From e7a972e113ddf3271c4c0e01a2d57e23ac5195f1 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Wed, 20 Jul 2011 16:05:05 +0100 Subject: [PATCH 01/18] simple_idct: add 10-bit version Signed-off-by: Mans Rullgard --- libavcodec/alpha/dsputil_alpha.c | 2 +- libavcodec/arm/dsputil_init_arm.c | 2 +- libavcodec/arm/dsputil_init_armv5te.c | 5 +- libavcodec/arm/dsputil_init_armv6.c | 5 +- libavcodec/arm/dsputil_init_neon.c | 2 +- libavcodec/bfin/dsputil_bfin.c | 22 +- libavcodec/dct-test.c | 2 +- libavcodec/dsputil.c | 15 +- libavcodec/ppc/dsputil_ppc.c | 2 +- libavcodec/ps2/dsputil_mmi.c | 3 +- libavcodec/sh4/dsputil_sh4.c | 3 +- libavcodec/simple_idct.c | 338 +--------------------- libavcodec/simple_idct.h | 11 +- libavcodec/simple_idct_template.c | 401 ++++++++++++++++++++++++++ libavcodec/sparc/dsputil_vis.c | 3 +- libavcodec/vc1.c | 4 +- libavcodec/x86/dsputil_mmx.c | 2 +- 17 files changed, 461 insertions(+), 361 deletions(-) create mode 100644 libavcodec/simple_idct_template.c diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 32bb0fc932..039608b9ed 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -335,7 +335,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) put_pixels_clamped_axp_p = c->put_pixels_clamped; add_pixels_clamped_axp_p = c->add_pixels_clamped; - if (!avctx->lowres && + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) { c->idct_put = ff_simple_idct_put_axp; diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 777a2f954e..5b4b24a43e 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -80,7 +80,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; - if (!avctx->lowres) { + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) { if(avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_ARM){ c->idct_put = j_rev_dct_arm_put; diff --git a/libavcodec/arm/dsputil_init_armv5te.c b/libavcodec/arm/dsputil_init_armv5te.c index 572e06cf36..e0224dabfb 100644 --- a/libavcodec/arm/dsputil_init_armv5te.c +++ b/libavcodec/arm/dsputil_init_armv5te.c @@ -29,8 +29,9 @@ void ff_prefetch_arm(void *mem, int stride, int h); void av_cold ff_dsputil_init_armv5te(DSPContext* c, AVCodecContext *avctx) { - if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && + (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { c->idct_put = ff_simple_idct_put_armv5te; c->idct_add = ff_simple_idct_add_armv5te; c->idct = ff_simple_idct_armv5te; diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index 7584aeefc6..d442415aec 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -74,8 +74,9 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) { const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; - if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && + (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { c->idct_put = ff_simple_idct_put_armv6; c->idct_add = ff_simple_idct_add_armv6; c->idct = ff_simple_idct_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 3f58dea9cd..ce45caf53e 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -177,7 +177,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) { const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; - if (!avctx->lowres) { + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) { if (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLENEON) { c->idct_put = ff_simple_idct_put_neon; diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c index 0db2d8baf8..d06bd8e4fd 100644 --- a/libavcodec/bfin/dsputil_bfin.c +++ b/libavcodec/bfin/dsputil_bfin.c @@ -256,16 +256,18 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) if (avctx->dct_algo == FF_DCT_AUTO) c->fdct = ff_bfin_fdct; - if (avctx->idct_algo==FF_IDCT_VP3) { - c->idct_permutation_type = FF_NO_IDCT_PERM; - c->idct = ff_bfin_vp3_idct; - c->idct_add = ff_bfin_vp3_idct_add; - c->idct_put = ff_bfin_vp3_idct_put; - } else if (avctx->idct_algo == FF_IDCT_AUTO) { - c->idct_permutation_type = FF_NO_IDCT_PERM; - c->idct = ff_bfin_idct; - c->idct_add = bfin_idct_add; - c->idct_put = bfin_idct_put; + if (avctx->bits_per_raw_sample <= 8) { + if (avctx->idct_algo == FF_IDCT_VP3) { + c->idct_permutation_type = FF_NO_IDCT_PERM; + c->idct = ff_bfin_vp3_idct; + c->idct_add = ff_bfin_vp3_idct_add; + c->idct_put = ff_bfin_vp3_idct_put; + } else if (avctx->idct_algo == FF_IDCT_AUTO) { + c->idct_permutation_type = FF_NO_IDCT_PERM; + c->idct = ff_bfin_idct; + c->idct_add = bfin_idct_add; + c->idct_put = bfin_idct_put; + } } } diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index dbf9639c1e..136f5c4742 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -111,7 +111,7 @@ static const struct algo idct_tab[] = { { "FAANI", ff_faanidct, NO_PERM }, { "REF-DBL", ff_ref_idct, NO_PERM }, { "INT", j_rev_dct, MMX_PERM }, - { "SIMPLE-C", ff_simple_idct, NO_PERM }, + { "SIMPLE-C", ff_simple_idct_8, NO_PERM }, #if HAVE_MMX #if CONFIG_GPL diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index e2cf708940..4b26f68387 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2237,7 +2237,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); s->dct_unquantize_inter(s, temp, 0, s->qscale); - ff_simple_idct(temp); //FIXME + ff_simple_idct_8(temp); //FIXME for(i=0; i<64; i++) sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); @@ -2878,6 +2878,12 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct = j_rev_dct1; c->idct_permutation_type= FF_NO_IDCT_PERM; }else{ + if (avctx->bits_per_raw_sample == 10) { + c->idct_put = ff_simple_idct_put_10; + c->idct_add = ff_simple_idct_add_10; + c->idct = ff_simple_idct_10; + c->idct_permutation_type = FF_NO_IDCT_PERM; + } else { if(avctx->idct_algo==FF_IDCT_INT){ c->idct_put= ff_jref_idct_put; c->idct_add= ff_jref_idct_add; @@ -2908,11 +2914,12 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct_put = ff_bink_idct_put_c; c->idct_permutation_type = FF_NO_IDCT_PERM; }else{ //accurate/default - c->idct_put= ff_simple_idct_put; - c->idct_add= ff_simple_idct_add; - c->idct = ff_simple_idct; + c->idct_put = ff_simple_idct_put_8; + c->idct_add = ff_simple_idct_add_8; + c->idct = ff_simple_idct_8; c->idct_permutation_type= FF_NO_IDCT_PERM; } + } } c->get_pixels = get_pixels_c; diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 3bf410c2ca..c1f68fc5f7 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -178,7 +178,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) } #endif //CONFIG_ENCODERS - if (avctx->lowres==0) { + if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) { if ((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_ALTIVEC)) { c->idct_put = idct_put_altivec; diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c index f4503a9030..585709679b 100644 --- a/libavcodec/ps2/dsputil_mmi.c +++ b/libavcodec/ps2/dsputil_mmi.c @@ -156,7 +156,8 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) c->get_pixels = get_pixels_mmi; - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){ + if (avctx->bits_per_raw_sample <= 8 && + (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2)) { c->idct_put= ff_mmi_idct_put; c->idct_add= ff_mmi_idct_add; c->idct = ff_mmi_idct; diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c index 9ea48ad4a1..bf58a9cbbd 100644 --- a/libavcodec/sh4/dsputil_sh4.c +++ b/libavcodec/sh4/dsputil_sh4.c @@ -97,7 +97,8 @@ void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx) if (!high_bit_depth) c->clear_blocks = clear_blocks_sh4; - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){ + if (avctx->bits_per_raw_sample <= 8 && + (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4)) { c->idct_put = idct_put; c->idct_add = idct_add; c->idct = idct_sh4; diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index 0ffbfcaf33..b62658bdb3 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -25,339 +25,19 @@ * simpleidct in C. */ -/* - based upon some outcommented c code from mpeg2dec (idct_mmx.c - written by Aaron Holtzman ) - */ - #include "libavutil/intreadwrite.h" #include "avcodec.h" #include "dsputil.h" #include "mathops.h" #include "simple_idct.h" -#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 -#define ROW_SHIFT 11 -#define COL_SHIFT 20 // 6 - -static inline void idctRowCondDC (DCTELEM * row) -{ - int a0, a1, a2, a3, b0, b1, b2, b3; - -#if HAVE_FAST_64BIT -#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) - if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { - uint64_t temp = (row[0] << 3) & 0xffff; - temp += temp << 16; - temp += temp << 32; - ((uint64_t *)row)[0] = temp; - ((uint64_t *)row)[1] = temp; - return; - } -#else - if (!(((uint32_t*)row)[1] | - ((uint32_t*)row)[2] | - ((uint32_t*)row)[3] | - row[1])) { - uint32_t temp = (row[0] << 3) & 0xffff; - temp += temp << 16; - ((uint32_t*)row)[0]=((uint32_t*)row)[1] = - ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; - return; - } -#endif - - a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); - a1 = a0; - a2 = a0; - a3 = a0; - - /* no need to optimize : gcc does it */ - a0 += W2 * row[2]; - a1 += W6 * row[2]; - a2 -= W6 * row[2]; - a3 -= W2 * row[2]; - - b0 = MUL16(W1, row[1]); - MAC16(b0, W3, row[3]); - b1 = MUL16(W3, row[1]); - MAC16(b1, -W7, row[3]); - b2 = MUL16(W5, row[1]); - MAC16(b2, -W1, row[3]); - b3 = MUL16(W7, row[1]); - MAC16(b3, -W5, row[3]); - - if (AV_RN64A(row + 4)) { - a0 += W4*row[4] + W6*row[6]; - a1 += - W4*row[4] - W2*row[6]; - a2 += - W4*row[4] + W2*row[6]; - a3 += W4*row[4] - W6*row[6]; - - MAC16(b0, W5, row[5]); - MAC16(b0, W7, row[7]); - - MAC16(b1, -W1, row[5]); - MAC16(b1, -W5, row[7]); - - MAC16(b2, W7, row[5]); - MAC16(b2, W3, row[7]); - - MAC16(b3, W3, row[5]); - MAC16(b3, -W1, row[7]); - } - - row[0] = (a0 + b0) >> ROW_SHIFT; - row[7] = (a0 - b0) >> ROW_SHIFT; - row[1] = (a1 + b1) >> ROW_SHIFT; - row[6] = (a1 - b1) >> ROW_SHIFT; - row[2] = (a2 + b2) >> ROW_SHIFT; - row[5] = (a2 - b2) >> ROW_SHIFT; - row[3] = (a3 + b3) >> ROW_SHIFT; - row[4] = (a3 - b3) >> ROW_SHIFT; -} - -static inline void idctSparseColPut (uint8_t *dest, int line_size, - DCTELEM * col) -{ - int a0, a1, a2, a3, b0, b1, b2, b3; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - /* XXX: I did that only to give same values as previous code */ - a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); - a1 = a0; - a2 = a0; - a3 = a0; - - a0 += + W2*col[8*2]; - a1 += + W6*col[8*2]; - a2 += - W6*col[8*2]; - a3 += - W2*col[8*2]; - - b0 = MUL16(W1, col[8*1]); - b1 = MUL16(W3, col[8*1]); - b2 = MUL16(W5, col[8*1]); - b3 = MUL16(W7, col[8*1]); - - MAC16(b0, + W3, col[8*3]); - MAC16(b1, - W7, col[8*3]); - MAC16(b2, - W1, col[8*3]); - MAC16(b3, - W5, col[8*3]); - - if(col[8*4]){ - a0 += + W4*col[8*4]; - a1 += - W4*col[8*4]; - a2 += - W4*col[8*4]; - a3 += + W4*col[8*4]; - } - - if (col[8*5]) { - MAC16(b0, + W5, col[8*5]); - MAC16(b1, - W1, col[8*5]); - MAC16(b2, + W7, col[8*5]); - MAC16(b3, + W3, col[8*5]); - } - - if(col[8*6]){ - a0 += + W6*col[8*6]; - a1 += - W2*col[8*6]; - a2 += + W2*col[8*6]; - a3 += - W6*col[8*6]; - } - - if (col[8*7]) { - MAC16(b0, + W7, col[8*7]); - MAC16(b1, - W5, col[8*7]); - MAC16(b2, + W3, col[8*7]); - MAC16(b3, - W1, col[8*7]); - } - - dest[0] = cm[(a0 + b0) >> COL_SHIFT]; - dest += line_size; - dest[0] = cm[(a1 + b1) >> COL_SHIFT]; - dest += line_size; - dest[0] = cm[(a2 + b2) >> COL_SHIFT]; - dest += line_size; - dest[0] = cm[(a3 + b3) >> COL_SHIFT]; - dest += line_size; - dest[0] = cm[(a3 - b3) >> COL_SHIFT]; - dest += line_size; - dest[0] = cm[(a2 - b2) >> COL_SHIFT]; - dest += line_size; - dest[0] = cm[(a1 - b1) >> COL_SHIFT]; - dest += line_size; - dest[0] = cm[(a0 - b0) >> COL_SHIFT]; -} - -static inline void idctSparseColAdd (uint8_t *dest, int line_size, - DCTELEM * col) -{ - int a0, a1, a2, a3, b0, b1, b2, b3; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - /* XXX: I did that only to give same values as previous code */ - a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); - a1 = a0; - a2 = a0; - a3 = a0; - - a0 += + W2*col[8*2]; - a1 += + W6*col[8*2]; - a2 += - W6*col[8*2]; - a3 += - W2*col[8*2]; - - b0 = MUL16(W1, col[8*1]); - b1 = MUL16(W3, col[8*1]); - b2 = MUL16(W5, col[8*1]); - b3 = MUL16(W7, col[8*1]); +#define BIT_DEPTH 8 +#include "simple_idct_template.c" +#undef BIT_DEPTH - MAC16(b0, + W3, col[8*3]); - MAC16(b1, - W7, col[8*3]); - MAC16(b2, - W1, col[8*3]); - MAC16(b3, - W5, col[8*3]); - - if(col[8*4]){ - a0 += + W4*col[8*4]; - a1 += - W4*col[8*4]; - a2 += - W4*col[8*4]; - a3 += + W4*col[8*4]; - } - - if (col[8*5]) { - MAC16(b0, + W5, col[8*5]); - MAC16(b1, - W1, col[8*5]); - MAC16(b2, + W7, col[8*5]); - MAC16(b3, + W3, col[8*5]); - } - - if(col[8*6]){ - a0 += + W6*col[8*6]; - a1 += - W2*col[8*6]; - a2 += + W2*col[8*6]; - a3 += - W6*col[8*6]; - } - - if (col[8*7]) { - MAC16(b0, + W7, col[8*7]); - MAC16(b1, - W5, col[8*7]); - MAC16(b2, + W3, col[8*7]); - MAC16(b3, - W1, col[8*7]); - } - - dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; - dest += line_size; - dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; - dest += line_size; - dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; - dest += line_size; - dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; - dest += line_size; - dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; - dest += line_size; - dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; - dest += line_size; - dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; - dest += line_size; - dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; -} - -static inline void idctSparseCol (DCTELEM * col) -{ - int a0, a1, a2, a3, b0, b1, b2, b3; - - /* XXX: I did that only to give same values as previous code */ - a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); - a1 = a0; - a2 = a0; - a3 = a0; - - a0 += + W2*col[8*2]; - a1 += + W6*col[8*2]; - a2 += - W6*col[8*2]; - a3 += - W2*col[8*2]; - - b0 = MUL16(W1, col[8*1]); - b1 = MUL16(W3, col[8*1]); - b2 = MUL16(W5, col[8*1]); - b3 = MUL16(W7, col[8*1]); - - MAC16(b0, + W3, col[8*3]); - MAC16(b1, - W7, col[8*3]); - MAC16(b2, - W1, col[8*3]); - MAC16(b3, - W5, col[8*3]); - - if(col[8*4]){ - a0 += + W4*col[8*4]; - a1 += - W4*col[8*4]; - a2 += - W4*col[8*4]; - a3 += + W4*col[8*4]; - } - - if (col[8*5]) { - MAC16(b0, + W5, col[8*5]); - MAC16(b1, - W1, col[8*5]); - MAC16(b2, + W7, col[8*5]); - MAC16(b3, + W3, col[8*5]); - } - - if(col[8*6]){ - a0 += + W6*col[8*6]; - a1 += - W2*col[8*6]; - a2 += + W2*col[8*6]; - a3 += - W6*col[8*6]; - } - - if (col[8*7]) { - MAC16(b0, + W7, col[8*7]); - MAC16(b1, - W5, col[8*7]); - MAC16(b2, + W3, col[8*7]); - MAC16(b3, - W1, col[8*7]); - } - - col[0 ] = ((a0 + b0) >> COL_SHIFT); - col[8 ] = ((a1 + b1) >> COL_SHIFT); - col[16] = ((a2 + b2) >> COL_SHIFT); - col[24] = ((a3 + b3) >> COL_SHIFT); - col[32] = ((a3 - b3) >> COL_SHIFT); - col[40] = ((a2 - b2) >> COL_SHIFT); - col[48] = ((a1 - b1) >> COL_SHIFT); - col[56] = ((a0 - b0) >> COL_SHIFT); -} - -void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - int i; - for(i=0; i<8; i++) - idctRowCondDC(block + i*8); - - for(i=0; i<8; i++) - idctSparseColPut(dest + i, line_size, block + i); -} - -void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - int i; - for(i=0; i<8; i++) - idctRowCondDC(block + i*8); - - for(i=0; i<8; i++) - idctSparseColAdd(dest + i, line_size, block + i); -} - -void ff_simple_idct(DCTELEM *block) -{ - int i; - for(i=0; i<8; i++) - idctRowCondDC(block + i*8); - - for(i=0; i<8; i++) - idctSparseCol(block + i); -} +#define BIT_DEPTH 10 +#include "simple_idct_template.c" +#undef BIT_DEPTH /* 2x4x8 idct */ @@ -428,7 +108,7 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) /* IDCT8 on each line */ for(i=0; i<8; i++) { - idctRowCondDC(block + i*8); + idctRowCondDC_8(block + i*8); } /* IDCT4 and store */ @@ -503,7 +183,7 @@ void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block) /* IDCT8 on each line */ for(i=0; i<4; i++) { - idctRowCondDC(block + i*8); + idctRowCondDC_8(block + i*8); } /* IDCT4 and store */ @@ -523,7 +203,7 @@ void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block) /* IDCT8 and store */ for(i=0; i<4; i++){ - idctSparseColAdd(dest + i, line_size, block + i); + idctSparseColAdd_8(dest + i, line_size, block + i); } } diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h index 23bae9c2fe..a33eb964ce 100644 --- a/libavcodec/simple_idct.h +++ b/libavcodec/simple_idct.h @@ -31,12 +31,17 @@ #include #include "dsputil.h" -void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block); -void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block); +void ff_simple_idct_put_8(uint8_t *dest, int line_size, DCTELEM *block); +void ff_simple_idct_add_8(uint8_t *dest, int line_size, DCTELEM *block); +void ff_simple_idct_8(DCTELEM *block); + +void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block); +void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block); +void ff_simple_idct_10(DCTELEM *block); + void ff_simple_idct_mmx(int16_t *block); void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block); void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block); -void ff_simple_idct(DCTELEM *block); void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block); diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c new file mode 100644 index 0000000000..be49cb9570 --- /dev/null +++ b/libavcodec/simple_idct_template.c @@ -0,0 +1,401 @@ +/* + * Simple IDCT + * + * Copyright (c) 2001 Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * simpleidct in C. + */ + +/* + based upon some outcommented c code from mpeg2dec (idct_mmx.c + written by Aaron Holtzman ) + */ + +#include "bit_depth_template.c" + +#undef W1 +#undef W2 +#undef W3 +#undef W4 +#undef W5 +#undef W6 +#undef W7 +#undef ROW_SHIFT +#undef COL_SHIFT +#undef DC_SHIFT +#undef MUL +#undef MAC + +#if BIT_DEPTH == 8 + +#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + +#define ROW_SHIFT 11 +#define COL_SHIFT 20 +#define DC_SHIFT 3 + +#define MUL(a, b) MUL16(a, b) +#define MAC(a, b, c) MAC16(a, b, c) + +#elif BIT_DEPTH == 10 + +#define W1 90901 +#define W2 85627 +#define W3 77062 +#define W4 65535 +#define W5 51491 +#define W6 35468 +#define W7 18081 + +#define ROW_SHIFT 15 +#define COL_SHIFT 20 +#define DC_SHIFT 1 + +#define MUL(a, b) ((a) * (b)) +#define MAC(a, b, c) ((a) += (b) * (c)) + +#else + +#error "Unsupported bitdepth" + +#endif + +static inline void FUNC(idctRowCondDC)(DCTELEM *row) +{ + int a0, a1, a2, a3, b0, b1, b2, b3; + +#if HAVE_FAST_64BIT +#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) + if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { + uint64_t temp = (row[0] << DC_SHIFT) & 0xffff; + temp += temp << 16; + temp += temp << 32; + ((uint64_t *)row)[0] = temp; + ((uint64_t *)row)[1] = temp; + return; + } +#else + if (!(((uint32_t*)row)[1] | + ((uint32_t*)row)[2] | + ((uint32_t*)row)[3] | + row[1])) { + uint32_t temp = (row[0] << DC_SHIFT) & 0xffff; + temp += temp << 16; + ((uint32_t*)row)[0]=((uint32_t*)row)[1] = + ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; + return; + } +#endif + + a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); + a1 = a0; + a2 = a0; + a3 = a0; + + /* no need to optimize : gcc does it */ + a0 += W2 * row[2]; + a1 += W6 * row[2]; + a2 -= W6 * row[2]; + a3 -= W2 * row[2]; + + b0 = MUL(W1, row[1]); + MAC(b0, W3, row[3]); + b1 = MUL(W3, row[1]); + MAC(b1, -W7, row[3]); + b2 = MUL(W5, row[1]); + MAC(b2, -W1, row[3]); + b3 = MUL(W7, row[1]); + MAC(b3, -W5, row[3]); + + if (AV_RN64A(row + 4)) { + a0 += W4*row[4] + W6*row[6]; + a1 += - W4*row[4] - W2*row[6]; + a2 += - W4*row[4] + W2*row[6]; + a3 += W4*row[4] - W6*row[6]; + + MAC(b0, W5, row[5]); + MAC(b0, W7, row[7]); + + MAC(b1, -W1, row[5]); + MAC(b1, -W5, row[7]); + + MAC(b2, W7, row[5]); + MAC(b2, W3, row[7]); + + MAC(b3, W3, row[5]); + MAC(b3, -W1, row[7]); + } + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; +} + +static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, + DCTELEM *col) +{ + int a0, a1, a2, a3, b0, b1, b2, b3; + INIT_CLIP; + + /* XXX: I did that only to give same values as previous code */ + a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); + a1 = a0; + a2 = a0; + a3 = a0; + + a0 += + W2*col[8*2]; + a1 += + W6*col[8*2]; + a2 += - W6*col[8*2]; + a3 += - W2*col[8*2]; + + b0 = MUL(W1, col[8*1]); + b1 = MUL(W3, col[8*1]); + b2 = MUL(W5, col[8*1]); + b3 = MUL(W7, col[8*1]); + + MAC(b0, + W3, col[8*3]); + MAC(b1, - W7, col[8*3]); + MAC(b2, - W1, col[8*3]); + MAC(b3, - W5, col[8*3]); + + if(col[8*4]){ + a0 += + W4*col[8*4]; + a1 += - W4*col[8*4]; + a2 += - W4*col[8*4]; + a3 += + W4*col[8*4]; + } + + if (col[8*5]) { + MAC(b0, + W5, col[8*5]); + MAC(b1, - W1, col[8*5]); + MAC(b2, + W7, col[8*5]); + MAC(b3, + W3, col[8*5]); + } + + if(col[8*6]){ + a0 += + W6*col[8*6]; + a1 += - W2*col[8*6]; + a2 += + W2*col[8*6]; + a3 += - W6*col[8*6]; + } + + if (col[8*7]) { + MAC(b0, + W7, col[8*7]); + MAC(b1, - W5, col[8*7]); + MAC(b2, + W3, col[8*7]); + MAC(b3, - W1, col[8*7]); + } + + dest[0] = CLIP((a0 + b0) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a1 + b1) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a2 + b2) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a3 + b3) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a3 - b3) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a2 - b2) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a1 - b1) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a0 - b0) >> COL_SHIFT); +} + +static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, + DCTELEM *col) +{ + int a0, a1, a2, a3, b0, b1, b2, b3; + INIT_CLIP; + + /* XXX: I did that only to give same values as previous code */ + a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); + a1 = a0; + a2 = a0; + a3 = a0; + + a0 += + W2*col[8*2]; + a1 += + W6*col[8*2]; + a2 += - W6*col[8*2]; + a3 += - W2*col[8*2]; + + b0 = MUL(W1, col[8*1]); + b1 = MUL(W3, col[8*1]); + b2 = MUL(W5, col[8*1]); + b3 = MUL(W7, col[8*1]); + + MAC(b0, + W3, col[8*3]); + MAC(b1, - W7, col[8*3]); + MAC(b2, - W1, col[8*3]); + MAC(b3, - W5, col[8*3]); + + if(col[8*4]){ + a0 += + W4*col[8*4]; + a1 += - W4*col[8*4]; + a2 += - W4*col[8*4]; + a3 += + W4*col[8*4]; + } + + if (col[8*5]) { + MAC(b0, + W5, col[8*5]); + MAC(b1, - W1, col[8*5]); + MAC(b2, + W7, col[8*5]); + MAC(b3, + W3, col[8*5]); + } + + if(col[8*6]){ + a0 += + W6*col[8*6]; + a1 += - W2*col[8*6]; + a2 += + W2*col[8*6]; + a3 += - W6*col[8*6]; + } + + if (col[8*7]) { + MAC(b0, + W7, col[8*7]); + MAC(b1, - W5, col[8*7]); + MAC(b2, + W3, col[8*7]); + MAC(b3, - W1, col[8*7]); + } + + dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); +} + +static inline void FUNC(idctSparseCol)(DCTELEM *col) +{ + int a0, a1, a2, a3, b0, b1, b2, b3; + + /* XXX: I did that only to give same values as previous code */ + a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); + a1 = a0; + a2 = a0; + a3 = a0; + + a0 += + W2*col[8*2]; + a1 += + W6*col[8*2]; + a2 += - W6*col[8*2]; + a3 += - W2*col[8*2]; + + b0 = MUL(W1, col[8*1]); + b1 = MUL(W3, col[8*1]); + b2 = MUL(W5, col[8*1]); + b3 = MUL(W7, col[8*1]); + + MAC(b0, + W3, col[8*3]); + MAC(b1, - W7, col[8*3]); + MAC(b2, - W1, col[8*3]); + MAC(b3, - W5, col[8*3]); + + if(col[8*4]){ + a0 += + W4*col[8*4]; + a1 += - W4*col[8*4]; + a2 += - W4*col[8*4]; + a3 += + W4*col[8*4]; + } + + if (col[8*5]) { + MAC(b0, + W5, col[8*5]); + MAC(b1, - W1, col[8*5]); + MAC(b2, + W7, col[8*5]); + MAC(b3, + W3, col[8*5]); + } + + if(col[8*6]){ + a0 += + W6*col[8*6]; + a1 += - W2*col[8*6]; + a2 += + W2*col[8*6]; + a3 += - W6*col[8*6]; + } + + if (col[8*7]) { + MAC(b0, + W7, col[8*7]); + MAC(b1, - W5, col[8*7]); + MAC(b2, + W3, col[8*7]); + MAC(b3, - W1, col[8*7]); + } + + col[0 ] = ((a0 + b0) >> COL_SHIFT); + col[8 ] = ((a1 + b1) >> COL_SHIFT); + col[16] = ((a2 + b2) >> COL_SHIFT); + col[24] = ((a3 + b3) >> COL_SHIFT); + col[32] = ((a3 - b3) >> COL_SHIFT); + col[40] = ((a2 - b2) >> COL_SHIFT); + col[48] = ((a1 - b1) >> COL_SHIFT); + col[56] = ((a0 - b0) >> COL_SHIFT); +} + +void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block) +{ + pixel *dest = (pixel *)dest_; + int i; + for(i=0; i<8; i++) + FUNC(idctRowCondDC)(block + i*8); + + for(i=0; i<8; i++) + FUNC(idctSparseColPut)(dest + i, line_size, block + i); +} + +void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block) +{ + pixel *dest = (pixel *)dest_; + int i; + for(i=0; i<8; i++) + FUNC(idctRowCondDC)(block + i*8); + + for(i=0; i<8; i++) + FUNC(idctSparseColAdd)(dest + i, line_size, block + i); +} + +void FUNC(ff_simple_idct)(DCTELEM *block) +{ + int i; + for(i=0; i<8; i++) + FUNC(idctRowCondDC)(block + i*8); + + for(i=0; i<8; i++) + FUNC(idctSparseCol)(block + i); +} diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c index ab9258b2b9..9811b2622e 100644 --- a/libavcodec/sparc/dsputil_vis.c +++ b/libavcodec/sparc/dsputil_vis.c @@ -3956,7 +3956,8 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (accel & ACCEL_SPARC_VIS) { - if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){ + if (avctx->bits_per_raw_sample <= 8 && + avctx->idct_algo == FF_IDCT_SIMPLEVIS) { c->idct_put = ff_simple_idct_put_vis; c->idct_add = ff_simple_idct_add_vis; c->idct = ff_simple_idct_vis; diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c index 32869b97d1..1f83dd27b8 100644 --- a/libavcodec/vc1.c +++ b/libavcodec/vc1.c @@ -337,11 +337,11 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte v->res_fasttx = get_bits1(gb); if (!v->res_fasttx) { - v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct; + v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct_8; v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; - v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add; + v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_8; v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add; v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add; v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add; diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 29fff276ad..84ae47b04d 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2465,7 +2465,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) if (mm_flags & AV_CPU_FLAG_MMX) { const int idct_algo= avctx->idct_algo; - if(avctx->lowres==0){ + if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) { if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ c->idct_put= ff_simple_idct_put_mmx; c->idct_add= ff_simple_idct_add_mmx; From d9a9f50a3683b577e9c391ce8cab9edc9b239fcb Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Mon, 18 Jul 2011 17:48:45 -0700 Subject: [PATCH 02/18] dsputil: Replace a LONG_MAX check with HAVE_FAST_64BIT. --- libavcodec/dsputil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 4b26f68387..457495f07e 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -185,7 +185,7 @@ static int pix_norm1_c(uint8_t * pix, int line_size) s += sq[pix[6]]; s += sq[pix[7]]; #else -#if LONG_MAX > 2147483647 +#if HAVE_FAST_64BIT register uint64_t x=*(uint64_t*)pix; s += sq[x&0xff]; s += sq[(x>>8)&0xff]; From e4b50572b7bae9edd0374f2ef918e6c68a4da018 Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Mon, 18 Jul 2011 17:49:23 -0700 Subject: [PATCH 03/18] dsputil: Remove extra blank line at end. --- libavcodec/dsputil.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 457495f07e..fa9b591fad 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3266,4 +3266,3 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); } } - From 80469eafb747018cb9d9a2547f65def715d073b2 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 20 Jul 2011 09:24:07 -0700 Subject: [PATCH 04/18] mpegvideo: fix invalid picture unreferencing. Mpegvideo would free frames as soon as they're not the next or prev picture. This is fine for a single-threading model, but fails miserably in a system where pictures can be referenced (as e.g. last/prev pic) in other threads. Keeping track of ownership of pictures keeps image references (e.g. motion vectors, or the reference of a motion vector) alive as long as the picture data itself is alive. This also happens to fix make THREADS=[3-16] fate-vsynth[12]-error. --- libavcodec/mpegvideo.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 27dcd74a5a..d422e12bc1 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -318,7 +318,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){ s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type; if (pic->f.age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->f.age] == AV_PICTURE_TYPE_B) pic->f.age = INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway. - pic->owner2 = NULL; + pic->owner2 = s; return 0; fail: //for the FF_ALLOCZ_OR_GOTO macro @@ -1041,14 +1041,16 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) /* mark&release old frames */ if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->f.data[0]) { if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){ - free_frame_buffer(s, s->last_picture_ptr); + if (s->last_picture_ptr->owner2 == s) + free_frame_buffer(s, s->last_picture_ptr); /* release forgotten pictures */ /* if(mpeg124/h263) */ if(!s->encoding){ for(i=0; ipicture_count; i++){ - if (s->picture[i].f.data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].f.reference) { - av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n"); + if (s->picture[i].owner2 == s && s->picture[i].f.data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].f.reference) { + if (!(avctx->active_thread_type & FF_THREAD_FRAME)) + av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n"); free_frame_buffer(s, &s->picture[i]); } } From a005174b2500c180b9e7344d882af6cf79c79d8f Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 20 Jul 2011 18:37:15 +0200 Subject: [PATCH 05/18] Remove unused code under G729_BITEXACT #ifdef. G729_BITEXACT is never set, so the code is all dead code. --- libavcodec/acelp_pitch_delay.c | 11 ----- libavcodec/celp_math.c | 85 ---------------------------------- libavcodec/lsp.c | 4 -- 3 files changed, 100 deletions(-) diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c index 2b791b5aa3..395247dd2a 100644 --- a/libavcodec/acelp_pitch_delay.c +++ b/libavcodec/acelp_pitch_delay.c @@ -105,20 +105,9 @@ int16_t ff_acelp_decode_gain_code( for(i=0; iscalarproduct_int16(fc_v, fc_v, subframe_size, 0))) >> 3) & ~0x3ff); - - mr_energy = (5439 * (mr_energy >> 15)) >> 8; // (0.15) = (0.15) * (7.23) - - return bidir_sal( - ((ff_exp2(mr_energy & 0x7fff) + 16) >> 5) * (gain_corr_factor >> 1), - (mr_energy >> 15) - 25 - ); -#else mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) / sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size, 0)); return mr_energy >> 12; -#endif } float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy, diff --git a/libavcodec/celp_math.c b/libavcodec/celp_math.c index 4ab20ad29f..8d36d4e900 100644 --- a/libavcodec/celp_math.c +++ b/libavcodec/celp_math.c @@ -27,82 +27,6 @@ #include "avcodec.h" #include "celp_math.h" -#ifdef G729_BITEXACT -/** - * Cosine table: base_cos[i] = (1<<15) * cos(i*PI/64) - */ -static const int16_t base_cos[64] = -{ - 32767, 32729, 32610, 32413, 32138, 31786, 31357, 30853, - 30274, 29622, 28899, 28106, 27246, 26320, 25330, 24279, - 23170, 22006, 20788, 19520, 18205, 16846, 15447, 14010, - 12540, 11039, 9512, 7962, 6393, 4808, 3212, 1608, - 0, -1608, -3212, -4808, -6393, -7962, -9512, -11039, - -12540, -14010, -15447, -16846, -18205, -19520, -20788, -22006, - -23170, -24279, -25330, -26320, -27246, -28106, -28899, -29622, - -30274, -30853, -31357, -31786, -32138, -32413, -32610, -32729 -}; - -/** - * Slope used to compute cos(x) - * - * cos(ind*64+offset) = base_cos[ind]+offset*slope_cos[ind] - * values multiplied by 1<<19 - */ -static const int16_t slope_cos[64] = -{ - -632, -1893, -3150, -4399, -5638, -6863, -8072, -9261, - -10428, -11570, -12684, -13767, -14817, -15832, -16808, -17744, - -18637, -19486, -20287, -21039, -21741, -22390, -22986, -23526, - -24009, -24435, -24801, -25108, -25354, -25540, -25664, -25726, - -25726, -25664, -25540, -25354, -25108, -24801, -24435, -24009, - -23526, -22986, -22390, -21741, -21039, -20287, -19486, -18637, - -17744, -16808, -15832, -14817, -13767, -12684, -11570, -10428, - -9261, -8072, -6863, -5638, -4399, -3150, -1893, -632 -}; - -/** - * Table used to compute exp2(x) - * - * tab_exp2[i] = (1<<14) * exp2(i/32) = 2^(i/32) i=0..32 - */ -static const uint16_t tab_exp2[33] = -{ - 16384, 16743, 17109, 17484, 17867, 18258, 18658, 19066, 19484, 19911, - 20347, 20792, 21247, 21713, 22188, 22674, 23170, 23678, 24196, 24726, - 25268, 25821, 26386, 26964, 27554, 28158, 28774, 29405, 30048, 30706, - 31379, 32066, 32767 -}; - -int16_t ff_cos(uint16_t arg) -{ - uint8_t offset= arg; - uint8_t ind = arg >> 8; - - assert(arg < 0x4000); - - return FFMAX(base_cos[ind] + ((slope_cos[ind] * offset) >> 12), -0x8000); -} - -int ff_exp2(uint16_t power) -{ - uint16_t frac_x0; - uint16_t frac_dx; - int result; - - assert(power <= 0x7fff); - - frac_x0 = power >> 10; - frac_dx = (power & 0x03ff) << 5; - - result = tab_exp2[frac_x0] << 15; - result += frac_dx * (tab_exp2[frac_x0+1] - tab_exp2[frac_x0]); - - return result >> 10; -} - -#else // G729_BITEXACT - /** * Cosine table: base_cos[i] = (1<<15) * cos(i*PI/64) */ @@ -154,8 +78,6 @@ int ff_exp2(uint16_t power) return result + ((result*(power&31)*89)>>22); } -#endif // else G729_BITEXACT - /** * Table used to compute log2(x) * @@ -163,17 +85,10 @@ int ff_exp2(uint16_t power) */ static const uint16_t tab_log2[33] = { -#ifdef G729_BITEXACT - 0, 1455, 2866, 4236, 5568, 6863, 8124, 9352, - 10549, 11716, 12855, 13967, 15054, 16117, 17156, 18172, - 19167, 20142, 21097, 22033, 22951, 23852, 24735, 25603, - 26455, 27291, 28113, 28922, 29716, 30497, 31266, 32023, 32767, -#else 4, 1459, 2870, 4240, 5572, 6867, 8127, 9355, 10552, 11719, 12858, 13971, 15057, 16120, 17158, 18175, 19170, 20145, 21100, 22036, 22954, 23854, 24738, 25605, 26457, 27294, 28116, 28924, 29719, 30500, 31269, 32025, 32769, -#endif }; int ff_log2(uint32_t value) diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c index 1b4afa22e8..834346bc25 100644 --- a/libavcodec/lsp.c +++ b/libavcodec/lsp.c @@ -150,11 +150,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd /* LSP values for first subframe (3.2.5 of G.729, Equation 24)*/ for(i=0; i> 1) + (lsp_prev[i] >> 1); -#else lsp_1st[i] = (lsp_2nd[i] + lsp_prev[i]) >> 1; -#endif ff_acelp_lsp2lpc(lp_1st, lsp_1st, lp_order >> 1); From 79a9aab56f06b870bfb8090e8e29fabdd0cfc6df Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 15 Jul 2011 02:37:33 +0200 Subject: [PATCH 06/18] Remove unused and non-compiling vestigial g729 decoder --- libavcodec/acelp_vectors.c | 20 --- libavcodec/acelp_vectors.h | 31 ---- libavcodec/amrnbdec.c | 2 +- libavcodec/g729data.h | 278 ------------------------------- libavcodec/g729dec.c | 331 ------------------------------------- 5 files changed, 1 insertion(+), 661 deletions(-) delete mode 100644 libavcodec/g729data.h delete mode 100644 libavcodec/g729dec.c diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c index 25a6ff27df..b7c05e743f 100644 --- a/libavcodec/acelp_vectors.c +++ b/libavcodec/acelp_vectors.c @@ -48,26 +48,6 @@ const uint8_t ff_fc_2pulses_9bits_track1_gray[16] = 28, 26, }; -const uint8_t ff_fc_2pulses_9bits_track2_gray[32] = -{ - 0, 2, - 5, 4, - 12, 10, - 7, 9, - 25, 24, - 20, 22, - 14, 15, - 19, 17, - 36, 31, - 21, 26, - 1, 6, - 16, 11, - 27, 29, - 32, 30, - 39, 37, - 34, 35, -}; - const uint8_t ff_fc_4pulses_8bits_tracks_13[16] = { 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, diff --git a/libavcodec/acelp_vectors.h b/libavcodec/acelp_vectors.h index 07d1000d3f..2c5d319e45 100644 --- a/libavcodec/acelp_vectors.h +++ b/libavcodec/acelp_vectors.h @@ -81,37 +81,6 @@ extern const uint8_t ff_fc_4pulses_8bits_track_4[32]; extern const uint8_t ff_fc_2pulses_9bits_track1[16]; extern const uint8_t ff_fc_2pulses_9bits_track1_gray[16]; -/** - * Track|Pulse| Positions - * ----------------------------------------- - * 2 | 1 | 0, 7, 14, 20, 27, 34, 1, 21 - * | | 2, 9, 15, 22, 29, 35, 6, 26 - * | | 4,10, 17, 24, 30, 37, 11, 31 - * | | 5,12, 19, 25, 32, 39, 16, 36 - * ----------------------------------------- - * - * @remark Track in the table should be read top-to-bottom, left-to-right. - * - * @note (EE.1) This table (from the reference code) does not comply with - * the specification. - * The specification contains the following table: - * - * Track|Pulse| Positions - * ----------------------------------------- - * 2 | 1 | 0, 5, 10, 15, 20, 25, 30, 35 - * | | 1, 6, 11, 16, 21, 26, 31, 36 - * | | 2, 7, 12, 17, 22, 27, 32, 37 - * | | 4, 9, 14, 19, 24, 29, 34, 39 - * - * ----------------------------------------- - * - * @note (EE.2) Reference G.729D code also uses gray decoding for each - * pulse index before looking up the value in the table. - * - * Used in G.729 @@6.4k (with gray coding) - */ -extern const uint8_t ff_fc_2pulses_9bits_track2_gray[32]; - /** * b60 hamming windowed sinc function coefficients */ diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c index 1c90aadb12..501b137780 100644 --- a/libavcodec/amrnbdec.c +++ b/libavcodec/amrnbdec.c @@ -83,7 +83,7 @@ /** Maximum sharpening factor * * The specification says 0.8, which should be 13107, but the reference C code - * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in g729dec.c.) + * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in bitexact G.729.) */ #define SHARP_MAX 0.79449462890625 diff --git a/libavcodec/g729data.h b/libavcodec/g729data.h deleted file mode 100644 index 48010a7683..0000000000 --- a/libavcodec/g729data.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * data for G.729 decoder - * Copyright (c) 2007 Vladimir Voroshilov - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_G729DATA_H -#define AVCODEC_G729DATA_H - -#include - -#define MA_NP 4 ///< Moving Average (MA) prediction order - -#define VQ_1ST_BITS 7 ///< first stage vector of quantizer (size in bits) -#define VQ_2ND_BITS 5 ///< second stage vector of quantizer (size in bits) - -#define GC_1ST_IDX_BITS_8K 3 ///< gain codebook (first stage) index, 8k mode (size in bits) -#define GC_2ND_IDX_BITS_8K 4 ///< gain codebook (second stage) index, 8k mode (size in bits) - -#define GC_1ST_IDX_BITS_6K4 3 ///< gain codebook (first stage) index, 6.4k mode (size in bits) -#define GC_2ND_IDX_BITS_6K4 3 ///< gain codebook (second stage) index, 6.4k mode (size in bits) - -/** - * first stage LSP codebook - * (10-dimensional, with 128 entries (3.24 of G.729) - */ -static const int16_t cb_lsp_1st[1< -#include -#include -#include -#include -#include -#include - -#include "avcodec.h" -#include "libavutil/avutil.h" -#include "get_bits.h" - -#include "lsp.h" -#include "celp_math.h" -#include "acelp_filters.h" -#include "acelp_pitch_delay.h" -#include "acelp_vectors.h" -#include "g729data.h" - -/** - * minimum quantized LSF value (3.2.4) - * 0.005 in Q13 - */ -#define LSFQ_MIN 40 - -/** - * maximum quantized LSF value (3.2.4) - * 3.135 in Q13 - */ -#define LSFQ_MAX 25681 - -/** - * minimum LSF distance (3.2.4) - * 0.0391 in Q13 - */ -#define LSFQ_DIFF_MIN 321 - -/** - * minimum gain pitch value (3.8, Equation 47) - * 0.2 in (1.14) - */ -#define SHARP_MIN 3277 - -/** - * maximum gain pitch value (3.8, Equation 47) - * (EE) This does not comply with the specification. - * Specification says about 0.8, which should be - * 13107 in (1.14), but reference C code uses - * 13017 (equals to 0.7945) instead of it. - */ -#define SHARP_MAX 13017 - -/** - * subframe size - */ -#define SUBFRAME_SIZE 40 - - -typedef struct { - uint8_t ac_index_bits[2]; ///< adaptive codebook index for second subframe (size in bits) - uint8_t parity_bit; ///< parity bit for pitch delay - uint8_t gc_1st_index_bits; ///< gain codebook (first stage) index (size in bits) - uint8_t gc_2nd_index_bits; ///< gain codebook (second stage) index (size in bits) - uint8_t fc_signs_bits; ///< number of pulses in fixed-codebook vector - uint8_t fc_indexes_bits; ///< size (in bits) of fixed-codebook index entry -} G729FormatDescription; - -typedef struct { - int pitch_delay_int_prev; ///< integer part of previous subframe's pitch delay (4.1.3) - - /// (2.13) LSP quantizer outputs - int16_t past_quantizer_output_buf[MA_NP + 1][10]; - int16_t* past_quantizer_outputs[MA_NP + 1]; - - int16_t lsfq[10]; ///< (2.13) quantized LSF coefficients from previous frame - int16_t lsp_buf[2][10]; ///< (0.15) LSP coefficients (previous and current frames) (3.2.5) - int16_t *lsp[2]; ///< pointers to lsp_buf -} G729Context; - -static const G729FormatDescription format_g729_8k = { - .ac_index_bits = {8,5}, - .parity_bit = 1, - .gc_1st_index_bits = GC_1ST_IDX_BITS_8K, - .gc_2nd_index_bits = GC_2ND_IDX_BITS_8K, - .fc_signs_bits = 4, - .fc_indexes_bits = 13, -}; - -static const G729FormatDescription format_g729d_6k4 = { - .ac_index_bits = {8,4}, - .parity_bit = 0, - .gc_1st_index_bits = GC_1ST_IDX_BITS_6K4, - .gc_2nd_index_bits = GC_2ND_IDX_BITS_6K4, - .fc_signs_bits = 2, - .fc_indexes_bits = 9, -}; - -/** - * @brief pseudo random number generator - */ -static inline uint16_t g729_prng(uint16_t value) -{ - return 31821 * value + 13849; -} - -/** - * Get parity bit of bit 2..7 - */ -static inline int get_parity(uint8_t value) -{ - return (0x6996966996696996ULL >> (value >> 2)) & 1; -} - -static void lsf_decode(int16_t* lsfq, int16_t* past_quantizer_outputs[MA_NP + 1], - int16_t ma_predictor, - int16_t vq_1st, int16_t vq_2nd_low, int16_t vq_2nd_high) -{ - int i,j; - static const uint8_t min_distance[2]={10, 5}; //(2.13) - int16_t* quantizer_output = past_quantizer_outputs[MA_NP]; - - for (i = 0; i < 5; i++) { - quantizer_output[i] = cb_lsp_1st[vq_1st][i ] + cb_lsp_2nd[vq_2nd_low ][i ]; - quantizer_output[i + 5] = cb_lsp_1st[vq_1st][i + 5] + cb_lsp_2nd[vq_2nd_high][i + 5]; - } - - for (j = 0; j < 2; j++) { - for (i = 1; i < 10; i++) { - int diff = (quantizer_output[i - 1] - quantizer_output[i] + min_distance[j]) >> 1; - if (diff > 0) { - quantizer_output[i - 1] -= diff; - quantizer_output[i ] += diff; - } - } - } - - for (i = 0; i < 10; i++) { - int sum = quantizer_output[i] * cb_ma_predictor_sum[ma_predictor][i]; - for (j = 0; j < MA_NP; j++) - sum += past_quantizer_outputs[j][i] * cb_ma_predictor[ma_predictor][j][i]; - - lsfq[i] = sum >> 15; - } - - /* Rotate past_quantizer_outputs. */ - memmove(past_quantizer_outputs + 1, past_quantizer_outputs, MA_NP * sizeof(int16_t*)); - past_quantizer_outputs[0] = quantizer_output; - - ff_acelp_reorder_lsf(lsfq, LSFQ_DIFF_MIN, LSFQ_MIN, LSFQ_MAX, 10); -} - -static av_cold int decoder_init(AVCodecContext * avctx) -{ - G729Context* ctx = avctx->priv_data; - int i,k; - - if (avctx->channels != 1) { - av_log(avctx, AV_LOG_ERROR, "Only mono sound is supported (requested channels: %d).\n", avctx->channels); - return AVERROR(EINVAL); - } - - /* Both 8kbit/s and 6.4kbit/s modes uses two subframes per frame. */ - avctx->frame_size = SUBFRAME_SIZE << 1; - - for (k = 0; k < MA_NP + 1; k++) { - ctx->past_quantizer_outputs[k] = ctx->past_quantizer_output_buf[k]; - for (i = 1; i < 11; i++) - ctx->past_quantizer_outputs[k][i - 1] = (18717 * i) >> 3; - } - - ctx->lsp[0] = ctx->lsp_buf[0]; - ctx->lsp[1] = ctx->lsp_buf[1]; - memcpy(ctx->lsp[0], lsp_init, 10 * sizeof(int16_t)); - - return 0; -} - -static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, - AVPacket *avpkt) -{ - const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; - int16_t *out_frame = data; - GetBitContext gb; - G729FormatDescription format; - int frame_erasure = 0; ///< frame erasure detected during decoding - int bad_pitch = 0; ///< parity check failed - int i; - G729Context *ctx = avctx->priv_data; - int16_t lp[2][11]; // (3.12) - uint8_t ma_predictor; ///< switched MA predictor of LSP quantizer - uint8_t quantizer_1st; ///< first stage vector of quantizer - uint8_t quantizer_2nd_lo; ///< second stage lower vector of quantizer (size in bits) - uint8_t quantizer_2nd_hi; ///< second stage higher vector of quantizer (size in bits) - - int pitch_delay_int; // pitch delay, integer part - int pitch_delay_3x; // pitch delay, multiplied by 3 - - if (*data_size < SUBFRAME_SIZE << 2) { - av_log(avctx, AV_LOG_ERROR, "Error processing packet: output buffer too small\n"); - return AVERROR(EIO); - } - - if (buf_size == 10) { - format = format_g729_8k; - av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729 @ 8kbit/s"); - } else if (buf_size == 8) { - format = format_g729d_6k4; - av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729D @ 6.4kbit/s"); - } else { - av_log(avctx, AV_LOG_ERROR, "Packet size %d is unknown.\n", buf_size); - return AVERROR_INVALIDDATA; - } - - for (i=0; i < buf_size; i++) - frame_erasure |= buf[i]; - frame_erasure = !frame_erasure; - - init_get_bits(&gb, buf, buf_size); - - ma_predictor = get_bits(&gb, 1); - quantizer_1st = get_bits(&gb, VQ_1ST_BITS); - quantizer_2nd_lo = get_bits(&gb, VQ_2ND_BITS); - quantizer_2nd_hi = get_bits(&gb, VQ_2ND_BITS); - - lsf_decode(ctx->lsfq, ctx->past_quantizer_outputs, - ma_predictor, - quantizer_1st, quantizer_2nd_lo, quantizer_2nd_hi); - - ff_acelp_lsf2lsp(ctx->lsp[1], ctx->lsfq, 10); - - ff_acelp_lp_decode(&lp[0][0], &lp[1][0], ctx->lsp[1], ctx->lsp[0], 10); - - FFSWAP(int16_t*, ctx->lsp[1], ctx->lsp[0]); - - for (i = 0; i < 2; i++) { - uint8_t ac_index; ///< adaptive codebook index - uint8_t pulses_signs; ///< fixed-codebook vector pulse signs - int fc_indexes; ///< fixed-codebook indexes - uint8_t gc_1st_index; ///< gain codebook (first stage) index - uint8_t gc_2nd_index; ///< gain codebook (second stage) index - - ac_index = get_bits(&gb, format.ac_index_bits[i]); - if(!i && format.parity_bit) - bad_pitch = get_parity(ac_index) == get_bits1(&gb); - fc_indexes = get_bits(&gb, format.fc_indexes_bits); - pulses_signs = get_bits(&gb, format.fc_signs_bits); - gc_1st_index = get_bits(&gb, format.gc_1st_index_bits); - gc_2nd_index = get_bits(&gb, format.gc_2nd_index_bits); - - if(!i) { - if (bad_pitch) - pitch_delay_3x = 3 * ctx->pitch_delay_int_prev; - else - pitch_delay_3x = ff_acelp_decode_8bit_to_1st_delay3(ac_index); - } else { - int pitch_delay_min = av_clip(ctx->pitch_delay_int_prev - 5, - PITCH_DELAY_MIN, PITCH_DELAY_MAX - 9); - - if(packet_type == FORMAT_G729D_6K4) - pitch_delay_3x = ff_acelp_decode_4bit_to_2nd_delay3(ac_index, pitch_delay_min); - else - pitch_delay_3x = ff_acelp_decode_5_6_bit_to_2nd_delay3(ac_index, pitch_delay_min); - } - - /* Round pitch delay to nearest (used everywhere except ff_acelp_interpolate). */ - pitch_delay_int = (pitch_delay_3x + 1) / 3; - - ff_acelp_weighted_vector_sum(fc + pitch_delay_int, - fc + pitch_delay_int, - fc, 1 << 14, - av_clip(ctx->gain_pitch, SHARP_MIN, SHARP_MAX), - 0, 14, - SUBFRAME_SIZE - pitch_delay_int); - - if (frame_erasure) { - ctx->gain_pitch = (29491 * ctx->gain_pitch) >> 15; // 0.90 (0.15) - ctx->gain_code = ( 2007 * ctx->gain_code ) >> 11; // 0.98 (0.11) - - gain_corr_factor = 0; - } else { - ctx->gain_pitch = cb_gain_1st_8k[gc_1st_index][0] + - cb_gain_2nd_8k[gc_2nd_index][0]; - gain_corr_factor = cb_gain_1st_8k[gc_1st_index][1] + - cb_gain_2nd_8k[gc_2nd_index][1]; - - ff_acelp_weighted_vector_sum(ctx->exc + i * SUBFRAME_SIZE, - ctx->exc + i * SUBFRAME_SIZE, fc, - (!voicing && frame_erasure) ? 0 : ctx->gain_pitch, - ( voicing && frame_erasure) ? 0 : ctx->gain_code, - 1 << 13, 14, SUBFRAME_SIZE); - - ctx->pitch_delay_int_prev = pitch_delay_int; - } - - *data_size = SUBFRAME_SIZE << 2; - return buf_size; -} - -AVCodec ff_g729_decoder = -{ - "g729", - AVMEDIA_TYPE_AUDIO, - CODEC_ID_G729, - sizeof(G729Context), - decoder_init, - NULL, - NULL, - decode_frame, - .long_name = NULL_IF_CONFIG_SMALL("G.729"), -}; From 558c268318f968b77b1cd8bc55ad4429e4c39948 Mon Sep 17 00:00:00 2001 From: Sean McGovern Date: Wed, 20 Jul 2011 17:25:41 -0400 Subject: [PATCH 07/18] configure: add -xc99 to LDFLAGS for Sun CC Using Sun's compiler on Solaris, -xc99 is as much a linker flag as a compiler flag, so add it to LDFLAGS. Signed-off-by: Diego Biurrun --- configure | 1 + 1 file changed, 1 insertion(+) diff --git a/configure b/configure index 28e008fa1e..3953cfc44b 100755 --- a/configure +++ b/configure @@ -2014,6 +2014,7 @@ elif $cc -V 2>&1 | grep -q Sun; then cc_ident=$($cc -V 2>&1 | head -n1 | cut -d' ' -f 2-) DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "1s,^.*: ,$@: ," -e "\$$!s,\$$, \\\," -e "1!s,^.*: , ," > $(@:.o=.d)' DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -xM1' + add_ldflags -xc99 speed_cflags='-O5' size_cflags='-O5 -xspace' filter_cflags=suncc_flags From eedb1f2034e18d4a292167aef5efea6606874a7b Mon Sep 17 00:00:00 2001 From: Joseph Artsimovich Date: Mon, 18 Jul 2011 11:22:15 +0100 Subject: [PATCH 08/18] swscale: mark YUV422P10(LE,BE) as supported for output Signed-off-by: Mans Rullgard --- libswscale/utils.c | 2 ++ tests/ref/lavfi/pixdesc | 2 ++ tests/ref/lavfi/pixfmts_copy | 2 ++ tests/ref/lavfi/pixfmts_null | 2 ++ tests/ref/lavfi/pixfmts_scale | 2 ++ tests/ref/lavfi/pixfmts_vflip | 2 ++ 6 files changed, 12 insertions(+) diff --git a/libswscale/utils.c b/libswscale/utils.c index 0bde82026d..c6abb6b446 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -176,10 +176,12 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt) || (x)==PIX_FMT_YUV420P9LE \ || (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P16LE \ + || (x)==PIX_FMT_YUV422P10LE \ || (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV444P16LE \ || (x)==PIX_FMT_YUV420P9BE \ || (x)==PIX_FMT_YUV420P10BE \ + || (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV420P16BE \ || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ diff --git a/tests/ref/lavfi/pixdesc b/tests/ref/lavfi/pixdesc index a82f8ed9b6..3730988777 100644 --- a/tests/ref/lavfi/pixdesc +++ b/tests/ref/lavfi/pixdesc @@ -38,6 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a +yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 +yuv422p10le 3f478be644add24b6cc77e718a6e2afa yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy index a82f8ed9b6..3730988777 100644 --- a/tests/ref/lavfi/pixfmts_copy +++ b/tests/ref/lavfi/pixfmts_copy @@ -38,6 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a +yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 +yuv422p10le 3f478be644add24b6cc77e718a6e2afa yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null index a82f8ed9b6..3730988777 100644 --- a/tests/ref/lavfi/pixfmts_null +++ b/tests/ref/lavfi/pixfmts_null @@ -38,6 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a +yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 +yuv422p10le 3f478be644add24b6cc77e718a6e2afa yuv422p16be dc9886f2fccf87cc54b27e071a2c251e yuv422p16le f181c8d8436f1233ba566d9bc88005ec yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale index 094e52462f..392b9ce601 100644 --- a/tests/ref/lavfi/pixfmts_scale +++ b/tests/ref/lavfi/pixfmts_scale @@ -38,6 +38,8 @@ yuv420p16le cba8b390ad5e7b8678e419b8ce79c008 yuv420p9be a073b2d93b2a7dce2069ba252bc43175 yuv420p9le b67233c3c7d93763d07d88f697c145e1 yuv422p 918e37701ee7377d16a8a6c119c56a40 +yuv422p10be 533fd21e7943c20a1026b19069b3b867 +yuv422p10le 59b20a4a8609f5da2dc54c78aea11e6c yuv422p16be 2cf502d7d386db1f1b3b946679d897b1 yuv422p16le 3002a4e47520731dcee5929aff49eb74 yuv440p 461503fdb9b90451020aa3b25ddf041c diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip index 8702eca796..2b62518a28 100644 --- a/tests/ref/lavfi/pixfmts_vflip +++ b/tests/ref/lavfi/pixfmts_vflip @@ -38,6 +38,8 @@ yuv420p16le 0f609e588e5a258644ef85170d70e030 yuv420p9be be40ec975fb2873891643cbbbddbc3b0 yuv420p9le 7e606310d3f5ff12badf911e8f333471 yuv422p d7f5cb44d9b0210d66d6a8762640ab34 +yuv422p10be a28b051168af49435c04af5f58dce47b +yuv422p10le 35936ffff30df2697f47b9b8d2cb7dea yuv422p16be 51d9aa4e78d121c226d919ce97976fe4 yuv422p16le 12965c54bda8932ca72da194419a9908 yuv440p 876385e96165acf51271b20e5d85a416 From 1073823984cf2938896fa30f9cc3d25640bbc19a Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Wed, 20 Jul 2011 18:38:37 +0100 Subject: [PATCH 09/18] dnxhdenc: whitespace cosmetics Signed-off-by: Mans Rullgard --- libavcodec/dnxhdenc.c | 62 ++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index 85832a1dc3..58698a48ab 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -52,10 +52,10 @@ static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const pixels += line_size; block += 8; } - memcpy(block , block- 8, sizeof(*block)*8); - memcpy(block+ 8, block-16, sizeof(*block)*8); - memcpy(block+16, block-24, sizeof(*block)*8); - memcpy(block+24, block-32, sizeof(*block)*8); + memcpy(block, block - 8, sizeof(*block) * 8); + memcpy(block + 8, block - 16, sizeof(*block) * 8); + memcpy(block + 16, block - 24, sizeof(*block) * 8); + memcpy(block + 24, block - 32, sizeof(*block) * 8); } static int dnxhd_init_vlc(DNXHDEncContext *ctx) @@ -64,9 +64,9 @@ static int dnxhd_init_vlc(DNXHDEncContext *ctx) int max_level = 1<<(ctx->cid_table->bit_depth+2); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes), fail); - FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits , max_level*4*sizeof(*ctx->vlc_bits ), fail); - FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2 , fail); - FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits , 63 , fail); + FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits, max_level*4*sizeof(*ctx->vlc_bits) , fail); + FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2, fail); + FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits, 63, fail); ctx->vlc_codes += max_level*2; ctx->vlc_bits += max_level*2; @@ -119,8 +119,8 @@ static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t* int qscale, i; - FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int) , fail); - FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int) , fail); + FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail); + FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail); @@ -218,7 +218,7 @@ static int dnxhd_encode_init(AVCodecContext *avctx) FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t), fail); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_offs, ctx->m.mb_height*sizeof(uint32_t), fail); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_bits, ctx->m.mb_num *sizeof(uint16_t), fail); - FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t) , fail); + FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t), fail); ctx->frame.key_frame = 1; ctx->frame.pict_type = AV_PICTURE_TYPE_I; @@ -341,7 +341,7 @@ static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block) int score = 0; int i; for (i = 0; i < 64; i++) - score += (block[i]-qblock[i])*(block[i]-qblock[i]); + score += (block[i] - qblock[i]) * (block[i] - qblock[i]); return score; } @@ -369,26 +369,28 @@ static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, in const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3); DSPContext *dsp = &ctx->m.dsp; - dsp->get_pixels(ctx->blocks[0], ptr_y , ctx->m.linesize); + dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[2], ptr_u , ctx->m.uvlinesize); - dsp->get_pixels(ctx->blocks[3], ptr_v , ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { if (ctx->interlaced) { - ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize); + ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize); - ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize); - ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize); + ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); + ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); } else { - dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]); - dsp->clear_block(ctx->blocks[6]); dsp->clear_block(ctx->blocks[7]); + dsp->clear_block(ctx->blocks[4]); + dsp->clear_block(ctx->blocks[5]); + dsp->clear_block(ctx->blocks[6]); + dsp->clear_block(ctx->blocks[7]); } } else { - dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize); + dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize); - dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); } } @@ -496,14 +498,14 @@ static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx) for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) { int thread_size; ctx->slice_offs[mb_y] = offset; - ctx->slice_size[mb_y] = 0; - for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { - unsigned mb = mb_y * ctx->m.mb_width + mb_x; - ctx->slice_size[mb_y] += ctx->mb_bits[mb]; - } - ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31; - ctx->slice_size[mb_y] >>= 3; - thread_size = ctx->slice_size[mb_y]; + ctx->slice_size[mb_y] = 0; + for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { + unsigned mb = mb_y * ctx->m.mb_width + mb_x; + ctx->slice_size[mb_y] += ctx->mb_bits[mb]; + } + ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31; + ctx->slice_size[mb_y] >>= 3; + thread_size = ctx->slice_size[mb_y]; offset += thread_size; } } From b04997839786c56e5000569d0192fa629e2d3a76 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Wed, 20 Jul 2011 18:41:37 +0100 Subject: [PATCH 10/18] dnxhdenc: remove inline from function only called through pointer Signed-off-by: Mans Rullgard --- libavcodec/dnxhdenc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index 58698a48ab..b65d0bf669 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -41,7 +41,7 @@ static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL #define LAMBDA_FRAC_BITS 10 -static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size) +static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; for (i = 0; i < 4; i++) { From 42c27f2ecae38abd7d6b75070998fb2f98089389 Mon Sep 17 00:00:00 2001 From: Joseph Artsimovich Date: Mon, 18 Jul 2011 11:21:33 +0100 Subject: [PATCH 11/18] dnxhd: rename some data tables Signed-off-by: Mans Rullgard --- libavcodec/dnxhddata.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c index 642b22fb5d..677fb8f5be 100644 --- a/libavcodec/dnxhddata.c +++ b/libavcodec/dnxhddata.c @@ -448,7 +448,7 @@ static const uint8_t dnxhd_1238_ac_index_flag[257] = { 1, }; -static const uint16_t dnxhd_1238_run_codes[62] = { +static const uint16_t dnxhd_1235_1238_1241_run_codes[62] = { 0, 4, 10, 11, 24, 25, 26, 27, 56, 57, 58, 59, 120, 242, 486, 487, 488, 489, 980, 981, 982, 983, 984, 985, @@ -459,7 +459,7 @@ static const uint16_t dnxhd_1238_run_codes[62] = { 1018, 1019, 1020, 1021, 1022, 1023, }; -static const uint8_t dnxhd_1238_run_bits[62] = { +static const uint8_t dnxhd_1235_1238_1241_run_bits[62] = { 1, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, @@ -473,15 +473,15 @@ static const uint8_t dnxhd_1238_run[62] = { 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, }; -static const uint8_t dnxhd_1241_dc_codes[14] = { +static const uint8_t dnxhd_1235_1241_dc_codes[14] = { 10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127, }; -static const uint8_t dnxhd_1241_dc_bits[14] = { +static const uint8_t dnxhd_1235_1241_dc_bits[14] = { 4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7, }; -static const uint16_t dnxhd_1241_ac_codes[257] = { +static const uint16_t dnxhd_1235_1241_ac_codes[257] = { 0, 1, 4, 10, 11, 24, 25, 26, 54, 55, 56, 57, 116, 117, 118, 119, 240, 241, 242, 243, 244, 245, 492, 493, @@ -517,7 +517,7 @@ static const uint16_t dnxhd_1241_ac_codes[257] = { 65535, }; -static const uint8_t dnxhd_1241_ac_bits[257] = { +static const uint8_t dnxhd_1235_1241_ac_bits[257] = { 2, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, @@ -537,7 +537,7 @@ static const uint8_t dnxhd_1241_ac_bits[257] = { 16, }; -static const uint8_t dnxhd_1241_ac_level[257] = { +static const uint8_t dnxhd_1235_1241_ac_level[257] = { 1, 1, 2, 3, 0, 4, 5, 2, 6, 7, 8, 3, 9, 10, 11, 4, 12, 13, 14, 15, 16, 5, 17, 18, 19, 20, 21, 6, 7, 22, 23, 24, 25, 26, 27, 28, 29, 8, 9, 30, 31, 32, 33, 34, 35, 36, 37, 38, @@ -557,7 +557,7 @@ static const uint8_t dnxhd_1241_ac_level[257] = { 64, }; -static const uint8_t dnxhd_1241_ac_run_flag[257] = { +static const uint8_t dnxhd_1235_1241_ac_run_flag[257] = { 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -577,7 +577,7 @@ static const uint8_t dnxhd_1241_ac_run_flag[257] = { 1, }; -static const uint8_t dnxhd_1241_ac_index_flag[257] = { +static const uint8_t dnxhd_1235_1241_ac_index_flag[257] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -597,7 +597,7 @@ static const uint8_t dnxhd_1241_ac_index_flag[257] = { 1, }; -static const uint8_t dnxhd_1241_run[62] = { +static const uint8_t dnxhd_1235_1241_run[62] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, @@ -890,14 +890,14 @@ const CIDEntry ff_dnxhd_cid_table[] = { dnxhd_1238_dc_codes, dnxhd_1238_dc_bits, dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level, dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag, - dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1238_run, + dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run, { 175, 185, 220, 365, 440 } }, { 1241, 1920, 1080, 1, 917504, 458752, 6, 10, dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight, - dnxhd_1241_dc_codes, dnxhd_1241_dc_bits, - dnxhd_1241_ac_codes, dnxhd_1241_ac_bits, dnxhd_1241_ac_level, - dnxhd_1241_ac_run_flag, dnxhd_1241_ac_index_flag, - dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1241_run, + dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits, + dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level, + dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag, + dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run, { 185, 220 } }, { 1242, 1920, 1080, 1, 606208, 303104, 4, 8, dnxhd_1242_luma_weight, dnxhd_1242_chroma_weight, @@ -911,7 +911,7 @@ const CIDEntry ff_dnxhd_cid_table[] = { dnxhd_1238_dc_codes, dnxhd_1238_dc_bits, dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level, dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag, - dnxhd_1238_run_codes, dnxhd_1238_run_bits, dnxhd_1238_run, + dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run, { 185, 220 } }, { 1251, 1280, 720, 0, 458752, 458752, 4, 8, dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight, From a82beafd64529cba8c20bd24f63d6af7dccffb7d Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 21 Jul 2011 02:21:14 +0100 Subject: [PATCH 12/18] dsputil: allow 9/10-bit functions for non-h264 codecs Signed-off-by: Mans Rullgard --- libavcodec/dsputil.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index fa9b591fad..4008389a9d 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3192,21 +3192,18 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) dspfunc2(avg_h264_qpel, 1, 8, depth);\ dspfunc2(avg_h264_qpel, 2, 4, depth); - if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) { - BIT_DEPTH_FUNCS(8) - } else { - switch (avctx->bits_per_raw_sample) { - case 9: - BIT_DEPTH_FUNCS(9) - break; - case 10: - BIT_DEPTH_FUNCS(10) - break; - default: - av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); - BIT_DEPTH_FUNCS(8) - break; - } + switch (avctx->bits_per_raw_sample) { + case 9: + BIT_DEPTH_FUNCS(9); + break; + case 10: + BIT_DEPTH_FUNCS(10); + break; + default: + av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); + case 8: + BIT_DEPTH_FUNCS(8); + break; } From e7bcc5baf721d6a80fcb79674aad4171e5ca94e7 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 21 Jul 2011 03:56:44 +0100 Subject: [PATCH 13/18] simple_idct: change 10-bit add/put stride from pixels to bytes This matches other dsputil functions and simplifies calls. Signed-off-by: Mans Rullgard --- libavcodec/simple_idct_template.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index be49cb9570..1b6115d41f 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -372,6 +372,9 @@ void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block) { pixel *dest = (pixel *)dest_; int i; + + line_size /= sizeof(pixel); + for(i=0; i<8; i++) FUNC(idctRowCondDC)(block + i*8); @@ -383,6 +386,9 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block) { pixel *dest = (pixel *)dest_; int i; + + line_size /= sizeof(pixel); + for(i=0; i<8; i++) FUNC(idctRowCondDC)(block + i*8); From 7df701a1925cc46cea5a01988cbabc1e6b3a49b1 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 21 Jul 2011 10:58:56 +0100 Subject: [PATCH 14/18] dsputil: remove huge #if 0 block Signed-off-by: Mans Rullgard --- libavcodec/dsputil_template.c | 145 ---------------------------------- 1 file changed, 145 deletions(-) diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c index 645a881a19..9f8cf557c8 100644 --- a/libavcodec/dsputil_template.c +++ b/libavcodec/dsputil_template.c @@ -230,150 +230,6 @@ static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int l } } -#if 0 - -#define PIXOP2(OPNAME, OP) \ -static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8*sizeof(pixel))\ -CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8*sizeof(pixel))\ -CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8*sizeof(pixel))\ -CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8*sizeof(pixel))\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8*sizeof(pixel))\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8*sizeof(pixel))\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8*sizeof(pixel)) - -#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) -#else // 64 bit variant - #define PIXOP2(OPNAME, OP) \ static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ int i;\ @@ -749,7 +605,6 @@ CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pi CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\ #define op_avg(a, b) a = rnd_avg_pixel4(a, b) -#endif #define op_put(a, b) a = b PIXOP2(avg, op_avg) From a402f109599c589ba75f6d95cf071a205406d9d7 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 21 Jul 2011 09:48:29 +0100 Subject: [PATCH 15/18] simple_idct: make repeated code a macro Signed-off-by: Mans Rullgard --- libavcodec/simple_idct_template.c | 197 ++++++++---------------------- 1 file changed, 53 insertions(+), 144 deletions(-) diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index 1b6115d41f..81845c962d 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -161,60 +161,63 @@ static inline void FUNC(idctRowCondDC)(DCTELEM *row) row[4] = (a3 - b3) >> ROW_SHIFT; } +#define IDCT_COLS do { \ + a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ + a1 = a0; \ + a2 = a0; \ + a3 = a0; \ + \ + a0 += W2*col[8*2]; \ + a1 += W6*col[8*2]; \ + a2 += -W6*col[8*2]; \ + a3 += -W2*col[8*2]; \ + \ + b0 = MUL(W1, col[8*1]); \ + b1 = MUL(W3, col[8*1]); \ + b2 = MUL(W5, col[8*1]); \ + b3 = MUL(W7, col[8*1]); \ + \ + MAC(b0, W3, col[8*3]); \ + MAC(b1, -W7, col[8*3]); \ + MAC(b2, -W1, col[8*3]); \ + MAC(b3, -W5, col[8*3]); \ + \ + if (col[8*4]) { \ + a0 += W4*col[8*4]; \ + a1 += -W4*col[8*4]; \ + a2 += -W4*col[8*4]; \ + a3 += W4*col[8*4]; \ + } \ + \ + if (col[8*5]) { \ + MAC(b0, W5, col[8*5]); \ + MAC(b1, -W1, col[8*5]); \ + MAC(b2, W7, col[8*5]); \ + MAC(b3, W3, col[8*5]); \ + } \ + \ + if (col[8*6]) { \ + a0 += W6*col[8*6]; \ + a1 += -W2*col[8*6]; \ + a2 += W2*col[8*6]; \ + a3 += -W6*col[8*6]; \ + } \ + \ + if (col[8*7]) { \ + MAC(b0, W7, col[8*7]); \ + MAC(b1, -W5, col[8*7]); \ + MAC(b2, W3, col[8*7]); \ + MAC(b3, -W1, col[8*7]); \ + } \ + } while (0) + static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, DCTELEM *col) { int a0, a1, a2, a3, b0, b1, b2, b3; INIT_CLIP; - /* XXX: I did that only to give same values as previous code */ - a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); - a1 = a0; - a2 = a0; - a3 = a0; - - a0 += + W2*col[8*2]; - a1 += + W6*col[8*2]; - a2 += - W6*col[8*2]; - a3 += - W2*col[8*2]; - - b0 = MUL(W1, col[8*1]); - b1 = MUL(W3, col[8*1]); - b2 = MUL(W5, col[8*1]); - b3 = MUL(W7, col[8*1]); - - MAC(b0, + W3, col[8*3]); - MAC(b1, - W7, col[8*3]); - MAC(b2, - W1, col[8*3]); - MAC(b3, - W5, col[8*3]); - - if(col[8*4]){ - a0 += + W4*col[8*4]; - a1 += - W4*col[8*4]; - a2 += - W4*col[8*4]; - a3 += + W4*col[8*4]; - } - - if (col[8*5]) { - MAC(b0, + W5, col[8*5]); - MAC(b1, - W1, col[8*5]); - MAC(b2, + W7, col[8*5]); - MAC(b3, + W3, col[8*5]); - } - - if(col[8*6]){ - a0 += + W6*col[8*6]; - a1 += - W2*col[8*6]; - a2 += + W2*col[8*6]; - a3 += - W6*col[8*6]; - } - - if (col[8*7]) { - MAC(b0, + W7, col[8*7]); - MAC(b1, - W5, col[8*7]); - MAC(b2, + W3, col[8*7]); - MAC(b3, - W1, col[8*7]); - } + IDCT_COLS; dest[0] = CLIP((a0 + b0) >> COL_SHIFT); dest += line_size; @@ -239,54 +242,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, int a0, a1, a2, a3, b0, b1, b2, b3; INIT_CLIP; - /* XXX: I did that only to give same values as previous code */ - a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); - a1 = a0; - a2 = a0; - a3 = a0; - - a0 += + W2*col[8*2]; - a1 += + W6*col[8*2]; - a2 += - W6*col[8*2]; - a3 += - W2*col[8*2]; - - b0 = MUL(W1, col[8*1]); - b1 = MUL(W3, col[8*1]); - b2 = MUL(W5, col[8*1]); - b3 = MUL(W7, col[8*1]); - - MAC(b0, + W3, col[8*3]); - MAC(b1, - W7, col[8*3]); - MAC(b2, - W1, col[8*3]); - MAC(b3, - W5, col[8*3]); - - if(col[8*4]){ - a0 += + W4*col[8*4]; - a1 += - W4*col[8*4]; - a2 += - W4*col[8*4]; - a3 += + W4*col[8*4]; - } - - if (col[8*5]) { - MAC(b0, + W5, col[8*5]); - MAC(b1, - W1, col[8*5]); - MAC(b2, + W7, col[8*5]); - MAC(b3, + W3, col[8*5]); - } - - if(col[8*6]){ - a0 += + W6*col[8*6]; - a1 += - W2*col[8*6]; - a2 += + W2*col[8*6]; - a3 += - W6*col[8*6]; - } - - if (col[8*7]) { - MAC(b0, + W7, col[8*7]); - MAC(b1, - W5, col[8*7]); - MAC(b2, + W3, col[8*7]); - MAC(b3, - W1, col[8*7]); - } + IDCT_COLS; dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); dest += line_size; @@ -309,54 +265,7 @@ static inline void FUNC(idctSparseCol)(DCTELEM *col) { int a0, a1, a2, a3, b0, b1, b2, b3; - /* XXX: I did that only to give same values as previous code */ - a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); - a1 = a0; - a2 = a0; - a3 = a0; - - a0 += + W2*col[8*2]; - a1 += + W6*col[8*2]; - a2 += - W6*col[8*2]; - a3 += - W2*col[8*2]; - - b0 = MUL(W1, col[8*1]); - b1 = MUL(W3, col[8*1]); - b2 = MUL(W5, col[8*1]); - b3 = MUL(W7, col[8*1]); - - MAC(b0, + W3, col[8*3]); - MAC(b1, - W7, col[8*3]); - MAC(b2, - W1, col[8*3]); - MAC(b3, - W5, col[8*3]); - - if(col[8*4]){ - a0 += + W4*col[8*4]; - a1 += - W4*col[8*4]; - a2 += - W4*col[8*4]; - a3 += + W4*col[8*4]; - } - - if (col[8*5]) { - MAC(b0, + W5, col[8*5]); - MAC(b1, - W1, col[8*5]); - MAC(b2, + W7, col[8*5]); - MAC(b3, + W3, col[8*5]); - } - - if(col[8*6]){ - a0 += + W6*col[8*6]; - a1 += - W2*col[8*6]; - a2 += + W2*col[8*6]; - a3 += - W6*col[8*6]; - } - - if (col[8*7]) { - MAC(b0, + W7, col[8*7]); - MAC(b1, - W5, col[8*7]); - MAC(b2, + W3, col[8*7]); - MAC(b3, - W1, col[8*7]); - } + IDCT_COLS; col[0 ] = ((a0 + b0) >> COL_SHIFT); col[8 ] = ((a1 + b1) >> COL_SHIFT); From 2cc4f3b21f33740db1728bae104f336e7008731c Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 21 Jul 2011 09:49:17 +0100 Subject: [PATCH 16/18] simple_idct: whitespace cosmetics Signed-off-by: Mans Rullgard --- libavcodec/simple_idct_template.c | 248 +++++++++++++++--------------- 1 file changed, 124 insertions(+), 124 deletions(-) diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index 81845c962d..6d3f6f764d 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -87,78 +87,77 @@ static inline void FUNC(idctRowCondDC)(DCTELEM *row) { - int a0, a1, a2, a3, b0, b1, b2, b3; + int a0, a1, a2, a3, b0, b1, b2, b3; #if HAVE_FAST_64BIT #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) - if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { - uint64_t temp = (row[0] << DC_SHIFT) & 0xffff; - temp += temp << 16; - temp += temp << 32; - ((uint64_t *)row)[0] = temp; - ((uint64_t *)row)[1] = temp; - return; - } + if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { + uint64_t temp = (row[0] << DC_SHIFT) & 0xffff; + temp += temp << 16; + temp += temp << 32; + ((uint64_t *)row)[0] = temp; + ((uint64_t *)row)[1] = temp; + return; + } #else - if (!(((uint32_t*)row)[1] | - ((uint32_t*)row)[2] | - ((uint32_t*)row)[3] | - row[1])) { - uint32_t temp = (row[0] << DC_SHIFT) & 0xffff; - temp += temp << 16; - ((uint32_t*)row)[0]=((uint32_t*)row)[1] = - ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; - return; - } + if (!(((uint32_t*)row)[1] | + ((uint32_t*)row)[2] | + ((uint32_t*)row)[3] | + row[1])) { + uint32_t temp = (row[0] << DC_SHIFT) & 0xffff; + temp += temp << 16; + ((uint32_t*)row)[0]=((uint32_t*)row)[1] = + ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; + return; + } #endif - a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); - a1 = a0; - a2 = a0; - a3 = a0; - - /* no need to optimize : gcc does it */ - a0 += W2 * row[2]; - a1 += W6 * row[2]; - a2 -= W6 * row[2]; - a3 -= W2 * row[2]; - - b0 = MUL(W1, row[1]); - MAC(b0, W3, row[3]); - b1 = MUL(W3, row[1]); - MAC(b1, -W7, row[3]); - b2 = MUL(W5, row[1]); - MAC(b2, -W1, row[3]); - b3 = MUL(W7, row[1]); - MAC(b3, -W5, row[3]); - - if (AV_RN64A(row + 4)) { - a0 += W4*row[4] + W6*row[6]; - a1 += - W4*row[4] - W2*row[6]; - a2 += - W4*row[4] + W2*row[6]; - a3 += W4*row[4] - W6*row[6]; - - MAC(b0, W5, row[5]); - MAC(b0, W7, row[7]); - - MAC(b1, -W1, row[5]); - MAC(b1, -W5, row[7]); - - MAC(b2, W7, row[5]); - MAC(b2, W3, row[7]); - - MAC(b3, W3, row[5]); - MAC(b3, -W1, row[7]); - } - - row[0] = (a0 + b0) >> ROW_SHIFT; - row[7] = (a0 - b0) >> ROW_SHIFT; - row[1] = (a1 + b1) >> ROW_SHIFT; - row[6] = (a1 - b1) >> ROW_SHIFT; - row[2] = (a2 + b2) >> ROW_SHIFT; - row[5] = (a2 - b2) >> ROW_SHIFT; - row[3] = (a3 + b3) >> ROW_SHIFT; - row[4] = (a3 - b3) >> ROW_SHIFT; + a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); + a1 = a0; + a2 = a0; + a3 = a0; + + a0 += W2 * row[2]; + a1 += W6 * row[2]; + a2 -= W6 * row[2]; + a3 -= W2 * row[2]; + + b0 = MUL(W1, row[1]); + MAC(b0, W3, row[3]); + b1 = MUL(W3, row[1]); + MAC(b1, -W7, row[3]); + b2 = MUL(W5, row[1]); + MAC(b2, -W1, row[3]); + b3 = MUL(W7, row[1]); + MAC(b3, -W5, row[3]); + + if (AV_RN64A(row + 4)) { + a0 += W4*row[4] + W6*row[6]; + a1 += - W4*row[4] - W2*row[6]; + a2 += - W4*row[4] + W2*row[6]; + a3 += W4*row[4] - W6*row[6]; + + MAC(b0, W5, row[5]); + MAC(b0, W7, row[7]); + + MAC(b1, -W1, row[5]); + MAC(b1, -W5, row[7]); + + MAC(b2, W7, row[5]); + MAC(b2, W3, row[7]); + + MAC(b3, W3, row[5]); + MAC(b3, -W1, row[7]); + } + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; } #define IDCT_COLS do { \ @@ -214,67 +213,67 @@ static inline void FUNC(idctRowCondDC)(DCTELEM *row) static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, DCTELEM *col) { - int a0, a1, a2, a3, b0, b1, b2, b3; - INIT_CLIP; - - IDCT_COLS; - - dest[0] = CLIP((a0 + b0) >> COL_SHIFT); - dest += line_size; - dest[0] = CLIP((a1 + b1) >> COL_SHIFT); - dest += line_size; - dest[0] = CLIP((a2 + b2) >> COL_SHIFT); - dest += line_size; - dest[0] = CLIP((a3 + b3) >> COL_SHIFT); - dest += line_size; - dest[0] = CLIP((a3 - b3) >> COL_SHIFT); - dest += line_size; - dest[0] = CLIP((a2 - b2) >> COL_SHIFT); - dest += line_size; - dest[0] = CLIP((a1 - b1) >> COL_SHIFT); - dest += line_size; - dest[0] = CLIP((a0 - b0) >> COL_SHIFT); + int a0, a1, a2, a3, b0, b1, b2, b3; + INIT_CLIP; + + IDCT_COLS; + + dest[0] = CLIP((a0 + b0) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a1 + b1) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a2 + b2) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a3 + b3) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a3 - b3) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a2 - b2) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a1 - b1) >> COL_SHIFT); + dest += line_size; + dest[0] = CLIP((a0 - b0) >> COL_SHIFT); } static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, DCTELEM *col) { - int a0, a1, a2, a3, b0, b1, b2, b3; - INIT_CLIP; - - IDCT_COLS; - - dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); - dest += line_size; - dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); - dest += line_size; - dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); - dest += line_size; - dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); - dest += line_size; - dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); - dest += line_size; - dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); - dest += line_size; - dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); - dest += line_size; - dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); + int a0, a1, a2, a3, b0, b1, b2, b3; + INIT_CLIP; + + IDCT_COLS; + + dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); + dest += line_size; + dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); } static inline void FUNC(idctSparseCol)(DCTELEM *col) { - int a0, a1, a2, a3, b0, b1, b2, b3; - - IDCT_COLS; - - col[0 ] = ((a0 + b0) >> COL_SHIFT); - col[8 ] = ((a1 + b1) >> COL_SHIFT); - col[16] = ((a2 + b2) >> COL_SHIFT); - col[24] = ((a3 + b3) >> COL_SHIFT); - col[32] = ((a3 - b3) >> COL_SHIFT); - col[40] = ((a2 - b2) >> COL_SHIFT); - col[48] = ((a1 - b1) >> COL_SHIFT); - col[56] = ((a0 - b0) >> COL_SHIFT); + int a0, a1, a2, a3, b0, b1, b2, b3; + + IDCT_COLS; + + col[0 ] = ((a0 + b0) >> COL_SHIFT); + col[8 ] = ((a1 + b1) >> COL_SHIFT); + col[16] = ((a2 + b2) >> COL_SHIFT); + col[24] = ((a3 + b3) >> COL_SHIFT); + col[32] = ((a3 - b3) >> COL_SHIFT); + col[40] = ((a2 - b2) >> COL_SHIFT); + col[48] = ((a1 - b1) >> COL_SHIFT); + col[56] = ((a0 - b0) >> COL_SHIFT); } void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block) @@ -284,10 +283,10 @@ void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block) line_size /= sizeof(pixel); - for(i=0; i<8; i++) + for (i = 0; i < 8; i++) FUNC(idctRowCondDC)(block + i*8); - for(i=0; i<8; i++) + for (i = 0; i < 8; i++) FUNC(idctSparseColPut)(dest + i, line_size, block + i); } @@ -298,19 +297,20 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block) line_size /= sizeof(pixel); - for(i=0; i<8; i++) + for (i = 0; i < 8; i++) FUNC(idctRowCondDC)(block + i*8); - for(i=0; i<8; i++) + for (i = 0; i < 8; i++) FUNC(idctSparseColAdd)(dest + i, line_size, block + i); } void FUNC(ff_simple_idct)(DCTELEM *block) { int i; - for(i=0; i<8; i++) + + for (i = 0; i < 8; i++) FUNC(idctRowCondDC)(block + i*8); - for(i=0; i<8; i++) + for (i = 0; i < 8; i++) FUNC(idctSparseCol)(block + i); } From 371584c42bffa21991a0e0d97a03f6d42123ba1f Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Wed, 13 Jul 2011 18:17:54 +0100 Subject: [PATCH 17/18] build: add -L flags before existing LDFLAGS This ensures the linker picks the just built libraries even if LDFLAGS for some reason contains -L flags pointing at other directories containing libav libraries. Signed-off-by: Mans Rullgard --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index a27fe28934..9d6ec3a46d 100644 --- a/Makefile +++ b/Makefile @@ -20,13 +20,15 @@ $(foreach VAR,$(SILENT),$(eval override $(VAR) = @$($(VAR)))) $(eval INSTALL = @$(call ECHO,INSTALL,$$(^:$(SRC_PATH)/%=%)); $(INSTALL)) endif +ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale + IFLAGS := -I. -I$(SRC_PATH) CPPFLAGS := $(IFLAGS) $(CPPFLAGS) CFLAGS += $(ECFLAGS) CCFLAGS = $(CFLAGS) YASMFLAGS += $(IFLAGS) -Pconfig.asm HOSTCFLAGS += $(IFLAGS) -LDFLAGS += $(ALLFFLIBS:%=-Llib%) +LDFLAGS := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS) define COMPILE $($(1)DEP) @@ -66,8 +68,6 @@ BASENAMES = ffmpeg ffplay ffprobe ffserver ALLPROGS = $(BASENAMES:%=%$(EXESUF)) ALLMANPAGES = $(BASENAMES:%=%.1) -ALLFFLIBS = avcodec avdevice avfilter avformat avutil postproc swscale - FFLIBS-$(CONFIG_AVDEVICE) += avdevice FFLIBS-$(CONFIG_AVFILTER) += avfilter FFLIBS-$(CONFIG_AVFORMAT) += avformat From e5985185d2eda942333ebbb72bd7d043ffe40be7 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Thu, 21 Jul 2011 14:25:01 +0200 Subject: [PATCH 18/18] rv30: return AVERROR(EINVAL) instead of EINVAL On some platforms EINVAL could be positive, ensure we return negative values. --- libavcodec/rv30.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c index e047c82fe0..17ea801802 100644 --- a/libavcodec/rv30.c +++ b/libavcodec/rv30.c @@ -256,7 +256,7 @@ static av_cold int rv30_decode_init(AVCodecContext *avctx) if(avctx->extradata_size - 8 < (r->rpr - 1) * 2){ av_log(avctx, AV_LOG_ERROR, "Insufficient extradata - need at least %d bytes, got %d\n", 6 + r->rpr * 2, avctx->extradata_size); - return EINVAL; + return AVERROR(EINVAL); } r->parse_slice_header = rv30_parse_slice_header; r->decode_intra_types = rv30_decode_intra_types;