split intra / inter dequantization

Originally committed as revision 2607 to svn://svn.ffmpeg.org/ffmpeg/trunk
pull/126/head
Michael Niedermayer 21 years ago
parent 7ebfc0ea63
commit d50635cd24
  1. 7
      libavcodec/dsputil.c
  2. 120
      libavcodec/i386/mpegvideo_mmx.c
  3. 142
      libavcodec/mpegvideo.c
  4. 16
      libavcodec/mpegvideo.h

@ -2766,7 +2766,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s
memcpy(bak, temp, 64*sizeof(DCTELEM));
s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
s->dct_unquantize(s, temp, 0, s->qscale);
s->dct_unquantize_inter(s, temp, 0, s->qscale);
simple_idct(temp); //FIXME
for(i=0; i<64; i++)
@ -2839,7 +2839,10 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
}
if(last>=0){
s->dct_unquantize(s, temp, 0, s->qscale);
if(s->mb_intra)
s->dct_unquantize_intra(s, temp, 0, s->qscale);
else
s->dct_unquantize_inter(s, temp, 0, s->qscale);
}
s->dsp.idct_add(bak, stride, temp);

@ -31,31 +31,29 @@ static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xfff
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
static void dct_unquantize_h263_mmx(MpegEncContext *s,
static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int level, qmul, qadd, nCoeffs;
qmul = qscale << 1;
qadd = (qscale - 1) | 1;
assert(s->block_last_index[n]>=0 || s->h263_aic);
if (s->mb_intra) {
if (!s->h263_aic) {
if (n < 4)
level = block[0] * s->y_dc_scale;
else
level = block[0] * s->c_dc_scale;
qadd = (qscale - 1) | 1;
}else{
qadd = 0;
level= block[0];
}
if(s->ac_pred)
nCoeffs=63;
} else {
else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
level = 0;/* keep gcc quiet */
}
//printf("%d %d ", qmul, qadd);
asm volatile(
"movd %1, %%mm6 \n\t" //qmul
@ -104,11 +102,72 @@ asm volatile(
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory"
);
if(s->mb_intra)
block[0]= level;
}
static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int level, qmul, qadd, nCoeffs;
qmul = qscale << 1;
qadd = (qscale - 1) | 1;
assert(s->block_last_index[n]>=0 || s->h263_aic);
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd);
asm volatile(
"movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"movd %2, %%mm5 \n\t" //qadd
"pxor %%mm7, %%mm7 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
".balign 16\n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
"pmullw %%mm6, %%mm0 \n\t"
"pmullw %%mm6, %%mm1 \n\t"
"movq (%0, %3), %%mm2 \n\t"
"movq 8(%0, %3), %%mm3 \n\t"
"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
"pxor %%mm2, %%mm0 \n\t"
"pxor %%mm3, %%mm1 \n\t"
"paddw %%mm7, %%mm0 \n\t"
"paddw %%mm7, %%mm1 \n\t"
"pxor %%mm0, %%mm2 \n\t"
"pxor %%mm1, %%mm3 \n\t"
"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
"pandn %%mm2, %%mm0 \n\t"
"pandn %%mm3, %%mm1 \n\t"
"movq %%mm0, (%0, %3) \n\t"
"movq %%mm1, 8(%0, %3) \n\t"
"addl $16, %3 \n\t"
"jng 1b \n\t"
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory"
);
}
/*
NK:
Note: looking at PARANOID:
@ -138,18 +197,17 @@ asm volatile(
high3:low3 = low1*low2
high3 += tlow1
*/
static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int nCoeffs;
const uint16_t *quant_matrix;
int block0;
assert(s->block_last_index[n]>=0);
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
if (s->mb_intra) {
int block0;
if (n < 4)
block0 = block[0] * s->y_dc_scale;
else
@ -206,8 +264,18 @@ asm volatile(
: "%eax", "memory"
);
block[0]= block0;
}
static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int nCoeffs;
const uint16_t *quant_matrix;
assert(s->block_last_index[n]>=0);
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
} else {
quant_matrix = s->inter_matrix;
asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
@ -264,21 +332,18 @@ asm volatile(
);
}
}
static void dct_unquantize_mpeg2_mmx(MpegEncContext *s,
static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int nCoeffs;
const uint16_t *quant_matrix;
int block0;
assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
if (s->mb_intra) {
int block0;
if (n < 4)
block0 = block[0] * s->y_dc_scale;
else
@ -331,8 +396,19 @@ asm volatile(
);
block[0]= block0;
//Note, we dont do mismatch control for intra as errors cannot accumulate
}
static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int nCoeffs;
const uint16_t *quant_matrix;
assert(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
} else {
quant_matrix = s->inter_matrix;
asm volatile(
"pcmpeqw %%mm7, %%mm7 \n\t"
@ -398,7 +474,6 @@ asm volatile(
: "%eax", "memory"
);
}
}
/* draw the edges of width 'w' of an image of size width, height
this mmx version can only handle w==8 || w==16 */
@ -505,9 +580,12 @@ void MPV_common_init_mmx(MpegEncContext *s)
if (mm_flags & MM_MMX) {
const int dct_algo = s->avctx->dct_algo;
s->dct_unquantize_h263 = dct_unquantize_h263_mmx;
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx;
s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx;
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
draw_edges = draw_edges_mmx;

@ -40,11 +40,17 @@
#ifdef CONFIG_ENCODERS
static void encode_picture(MpegEncContext *s, int picture_number);
#endif //CONFIG_ENCODERS
static void dct_unquantize_mpeg1_c(MpegEncContext *s,
static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
static void dct_unquantize_mpeg2_c(MpegEncContext *s,
static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
static void dct_unquantize_h263_c(MpegEncContext *s,
static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
static void dct_unquantize_h263_intra_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
static void dct_unquantize_h263_inter_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
#ifdef CONFIG_ENCODERS
@ -204,9 +210,12 @@ void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
/* init common dct for both encoder and decoder */
int DCT_common_init(MpegEncContext *s)
{
s->dct_unquantize_h263 = dct_unquantize_h263_c;
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
#ifdef CONFIG_ENCODERS
s->dct_quantize= dct_quantize_c;
@ -1186,12 +1195,16 @@ alloc:
/* set dequantizer, we cant do it during init as it might change for mpeg4
and we cant do it in the header decode as init isnt called for mpeg4 there yet */
if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO)
s->dct_unquantize = s->dct_unquantize_mpeg2;
else if(s->out_format == FMT_H263)
s->dct_unquantize = s->dct_unquantize_h263;
else
s->dct_unquantize = s->dct_unquantize_mpeg1;
if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
}else if(s->out_format == FMT_H263){
s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
}else{
s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
}
if(s->dct_error_sum){
assert(s->avctx->noise_reduction && s->encoding);
@ -2746,7 +2759,7 @@ static inline void MPV_motion(MpegEncContext *s,
static inline void put_dct(MpegEncContext *s,
DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
{
s->dct_unquantize(s, block, i, qscale);
s->dct_unquantize_intra(s, block, i, qscale);
s->dsp.idct_put (dest, line_size, block);
}
@ -2763,7 +2776,7 @@ static inline void add_dequant_dct(MpegEncContext *s,
DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
{
if (s->block_last_index[i] >= 0) {
s->dct_unquantize(s, block, i, qscale);
s->dct_unquantize_inter(s, block, i, qscale);
s->dsp.idct_add (dest, line_size, block);
}
@ -4781,7 +4794,7 @@ static int dct_quantize_c(MpegEncContext *s,
#endif //CONFIG_ENCODERS
static void dct_unquantize_mpeg1_c(MpegEncContext *s,
static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int i, level, nCoeffs;
@ -4789,7 +4802,6 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
nCoeffs= s->block_last_index[n];
if (s->mb_intra) {
if (n < 4)
block[0] = block[0] * s->y_dc_scale;
else
@ -4809,17 +4821,21 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
level = (int)(level * qscale * quant_matrix[j]) >> 3;
level = (level - 1) | 1;
}
#ifdef PARANOID
if (level < -2048 || level > 2047)
fprintf(stderr, "unquant error %d %d\n", i, level);
#endif
block[j] = level;
}
}
} else {
i = 0;
}
static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int i, level, nCoeffs;
const uint16_t *quant_matrix;
nCoeffs= s->block_last_index[n];
quant_matrix = s->inter_matrix;
for(;i<=nCoeffs;i++) {
for(i=0; i<=nCoeffs; i++) {
int j= s->intra_scantable.permutated[i];
level = block[j];
if (level) {
@ -4834,17 +4850,12 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
((int) (quant_matrix[j]))) >> 4;
level = (level - 1) | 1;
}
#ifdef PARANOID
if (level < -2048 || level > 2047)
fprintf(stderr, "unquant error %d %d\n", i, level);
#endif
block[j] = level;
}
}
}
}
static void dct_unquantize_mpeg2_c(MpegEncContext *s,
static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int i, level, nCoeffs;
@ -4853,7 +4864,6 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
if(s->alternate_scan) nCoeffs= 63;
else nCoeffs= s->block_last_index[n];
if (s->mb_intra) {
if (n < 4)
block[0] = block[0] * s->y_dc_scale;
else
@ -4870,18 +4880,23 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
} else {
level = (int)(level * qscale * quant_matrix[j]) >> 3;
}
#ifdef PARANOID
if (level < -2048 || level > 2047)
fprintf(stderr, "unquant error %d %d\n", i, level);
#endif
block[j] = level;
}
}
} else {
}
static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int i, level, nCoeffs;
const uint16_t *quant_matrix;
int sum=-1;
i = 0;
if(s->alternate_scan) nCoeffs= 63;
else nCoeffs= s->block_last_index[n];
quant_matrix = s->inter_matrix;
for(;i<=nCoeffs;i++) {
for(i=0; i<=nCoeffs; i++) {
int j= s->intra_scantable.permutated[i];
level = block[j];
if (level) {
@ -4894,20 +4909,14 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
level = (((level << 1) + 1) * qscale *
((int) (quant_matrix[j]))) >> 4;
}
#ifdef PARANOID
if (level < -2048 || level > 2047)
fprintf(stderr, "unquant error %d %d\n", i, level);
#endif
block[j] = level;
sum+=level;
}
}
block[63]^=sum&1;
}
}
static void dct_unquantize_h263_c(MpegEncContext *s,
static void dct_unquantize_h263_intra_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int i, level, qmul, qadd;
@ -4915,25 +4924,23 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
assert(s->block_last_index[n]>=0);
qadd = (qscale - 1) | 1;
qmul = qscale << 1;
if (s->mb_intra) {
if (!s->h263_aic) {
if (n < 4)
block[0] = block[0] * s->y_dc_scale;
else
block[0] = block[0] * s->c_dc_scale;
}else
qadd = 0;
i = 1;
nCoeffs= 63; //does not allways use zigzag table
qadd = (qscale - 1) | 1;
}else{
i = 0;
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
qadd = 0;
}
if(s->ac_pred)
nCoeffs=63;
else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
for(;i<=nCoeffs;i++) {
for(i=1; i<=nCoeffs; i++) {
level = block[i];
if (level) {
if (level < 0) {
@ -4941,15 +4948,36 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
} else {
level = level * qmul + qadd;
}
#ifdef PARANOID
if (level < -2048 || level > 2047)
fprintf(stderr, "unquant error %d %d\n", i, level);
#endif
block[i] = level;
}
}
}
static void dct_unquantize_h263_inter_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int i, level, qmul, qadd;
int nCoeffs;
assert(s->block_last_index[n]>=0);
qadd = (qscale - 1) | 1;
qmul = qscale << 1;
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
for(i=0; i<=nCoeffs; i++) {
level = block[i];
if (level) {
if (level < 0) {
level = level * qmul - qadd;
} else {
level = level * qmul + qadd;
}
block[i] = level;
}
}
}
static const AVOption mpeg4_options[] =
{

@ -645,13 +645,21 @@ typedef struct MpegEncContext {
#define SLICE_END -2 ///<end marker found
#define SLICE_NOEND -3 ///<no end marker or error found but mb count exceeded
void (*dct_unquantize_mpeg1)(struct MpegEncContext *s,
void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_mpeg2)(struct MpegEncContext *s,
void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_h263)(struct MpegEncContext *s,
void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_h263_intra)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_h263_inter)(struct MpegEncContext *s,
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
DCTELEM *block/*align 16*/, int n, int qscale);
void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
DCTELEM *block/*align 16*/, int n, int qscale);
int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);

Loading…
Cancel
Save