|
|
|
@ -33,7 +33,6 @@ |
|
|
|
|
#include "avcodec.h" |
|
|
|
|
#include "h264.h" |
|
|
|
|
#include "qpeldsp.h" |
|
|
|
|
#include "svq3.h" |
|
|
|
|
#include "thread.h" |
|
|
|
|
|
|
|
|
|
static inline int get_lowest_part_list_y(H264SliceContext *sl, |
|
|
|
@ -611,8 +610,7 @@ static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, |
|
|
|
|
|
|
|
|
|
static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, |
|
|
|
|
H264SliceContext *sl, |
|
|
|
|
int mb_type, int is_h264, |
|
|
|
|
int simple, |
|
|
|
|
int mb_type, int simple, |
|
|
|
|
int transform_bypass, |
|
|
|
|
int pixel_shift, |
|
|
|
|
const int *block_offset, |
|
|
|
@ -687,48 +685,40 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, |
|
|
|
|
h->hpc.pred4x4[dir](ptr, topright, linesize); |
|
|
|
|
nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; |
|
|
|
|
if (nnz) { |
|
|
|
|
if (is_h264) { |
|
|
|
|
if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
|
|
|
|
idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
|
|
|
|
else |
|
|
|
|
idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
|
|
|
|
} else if (CONFIG_SVQ3_DECODER) |
|
|
|
|
ff_svq3_add_idct_c(ptr, sl->mb + i * 16 + p * 256, linesize, qscale, 0); |
|
|
|
|
if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
|
|
|
|
idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
|
|
|
|
else |
|
|
|
|
idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize); |
|
|
|
|
if (is_h264) { |
|
|
|
|
if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { |
|
|
|
|
if (!transform_bypass) |
|
|
|
|
h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
sl->mb_luma_dc[p], |
|
|
|
|
h->dequant4_coeff[p][qscale][0]); |
|
|
|
|
else { |
|
|
|
|
static const uint8_t dc_mapping[16] = { |
|
|
|
|
0 * 16, 1 * 16, 4 * 16, 5 * 16, |
|
|
|
|
2 * 16, 3 * 16, 6 * 16, 7 * 16, |
|
|
|
|
8 * 16, 9 * 16, 12 * 16, 13 * 16, |
|
|
|
|
10 * 16, 11 * 16, 14 * 16, 15 * 16 |
|
|
|
|
}; |
|
|
|
|
for (i = 0; i < 16; i++) |
|
|
|
|
dctcoef_set(sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
pixel_shift, dc_mapping[i], |
|
|
|
|
dctcoef_get(sl->mb_luma_dc[p], |
|
|
|
|
pixel_shift, i)); |
|
|
|
|
} |
|
|
|
|
if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { |
|
|
|
|
if (!transform_bypass) |
|
|
|
|
h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
sl->mb_luma_dc[p], |
|
|
|
|
h->dequant4_coeff[p][qscale][0]); |
|
|
|
|
else { |
|
|
|
|
static const uint8_t dc_mapping[16] = { |
|
|
|
|
0 * 16, 1 * 16, 4 * 16, 5 * 16, |
|
|
|
|
2 * 16, 3 * 16, 6 * 16, 7 * 16, |
|
|
|
|
8 * 16, 9 * 16, 12 * 16, 13 * 16, |
|
|
|
|
10 * 16, 11 * 16, 14 * 16, 15 * 16 |
|
|
|
|
}; |
|
|
|
|
for (i = 0; i < 16; i++) |
|
|
|
|
dctcoef_set(sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
pixel_shift, dc_mapping[i], |
|
|
|
|
dctcoef_get(sl->mb_luma_dc[p], |
|
|
|
|
pixel_shift, i)); |
|
|
|
|
} |
|
|
|
|
} else if (CONFIG_SVQ3_DECODER) |
|
|
|
|
ff_svq3_luma_dc_dequant_idct_c(sl->mb + p * 256, |
|
|
|
|
sl->mb_luma_dc[p], qscale); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl, |
|
|
|
|
int mb_type, |
|
|
|
|
int is_h264, int simple, |
|
|
|
|
int mb_type, int simple, |
|
|
|
|
int transform_bypass, |
|
|
|
|
int pixel_shift, |
|
|
|
|
const int *block_offset, |
|
|
|
@ -739,60 +729,50 @@ static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264Sl |
|
|
|
|
int i; |
|
|
|
|
block_offset += 16 * p; |
|
|
|
|
if (!IS_INTRA4x4(mb_type)) { |
|
|
|
|
if (is_h264) { |
|
|
|
|
if (IS_INTRA16x16(mb_type)) { |
|
|
|
|
if (transform_bypass) { |
|
|
|
|
if (h->sps.profile_idc == 244 && |
|
|
|
|
(sl->intra16x16_pred_mode == VERT_PRED8x8 || |
|
|
|
|
sl->intra16x16_pred_mode == HOR_PRED8x8)) { |
|
|
|
|
h->hpc.pred16x16_add[sl->intra16x16_pred_mode](dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize); |
|
|
|
|
} else { |
|
|
|
|
for (i = 0; i < 16; i++) |
|
|
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]] || |
|
|
|
|
dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
|
|
|
|
h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], |
|
|
|
|
sl->mb + (i * 16 + p * 256 << pixel_shift), |
|
|
|
|
linesize); |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
h->h264dsp.h264_idct_add16intra(dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize, |
|
|
|
|
sl->non_zero_count_cache + p * 5 * 8); |
|
|
|
|
} |
|
|
|
|
} else if (sl->cbp & 15) { |
|
|
|
|
if (transform_bypass) { |
|
|
|
|
const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
|
|
|
|
idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear |
|
|
|
|
: h->h264dsp.h264_add_pixels4_clear; |
|
|
|
|
for (i = 0; i < 16; i += di) |
|
|
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]]) |
|
|
|
|
idct_add(dest_y + block_offset[i], |
|
|
|
|
sl->mb + (i * 16 + p * 256 << pixel_shift), |
|
|
|
|
linesize); |
|
|
|
|
if (IS_INTRA16x16(mb_type)) { |
|
|
|
|
if (transform_bypass) { |
|
|
|
|
if (h->sps.profile_idc == 244 && |
|
|
|
|
(sl->intra16x16_pred_mode == VERT_PRED8x8 || |
|
|
|
|
sl->intra16x16_pred_mode == HOR_PRED8x8)) { |
|
|
|
|
h->hpc.pred16x16_add[sl->intra16x16_pred_mode](dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize); |
|
|
|
|
} else { |
|
|
|
|
if (IS_8x8DCT(mb_type)) |
|
|
|
|
h->h264dsp.h264_idct8_add4(dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize, |
|
|
|
|
sl->non_zero_count_cache + p * 5 * 8); |
|
|
|
|
else |
|
|
|
|
h->h264dsp.h264_idct_add16(dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize, |
|
|
|
|
sl->non_zero_count_cache + p * 5 * 8); |
|
|
|
|
for (i = 0; i < 16; i++) |
|
|
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]] || |
|
|
|
|
dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
|
|
|
|
h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], |
|
|
|
|
sl->mb + (i * 16 + p * 256 << pixel_shift), |
|
|
|
|
linesize); |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
h->h264dsp.h264_idct_add16intra(dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize, |
|
|
|
|
sl->non_zero_count_cache + p * 5 * 8); |
|
|
|
|
} |
|
|
|
|
} else if (sl->cbp & 15) { |
|
|
|
|
if (transform_bypass) { |
|
|
|
|
const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
|
|
|
|
idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear |
|
|
|
|
: h->h264dsp.h264_add_pixels4_clear; |
|
|
|
|
for (i = 0; i < 16; i += di) |
|
|
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]]) |
|
|
|
|
idct_add(dest_y + block_offset[i], |
|
|
|
|
sl->mb + (i * 16 + p * 256 << pixel_shift), |
|
|
|
|
linesize); |
|
|
|
|
} else { |
|
|
|
|
if (IS_8x8DCT(mb_type)) |
|
|
|
|
h->h264dsp.h264_idct8_add4(dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize, |
|
|
|
|
sl->non_zero_count_cache + p * 5 * 8); |
|
|
|
|
else |
|
|
|
|
h->h264dsp.h264_idct_add16(dest_y, block_offset, |
|
|
|
|
sl->mb + (p * 256 << pixel_shift), |
|
|
|
|
linesize, |
|
|
|
|
sl->non_zero_count_cache + p * 5 * 8); |
|
|
|
|
} |
|
|
|
|
} else if (CONFIG_SVQ3_DECODER) { |
|
|
|
|
for (i = 0; i < 16; i++) |
|
|
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]] || sl->mb[i * 16 + p * 256]) { |
|
|
|
|
// FIXME benchmark weird rule, & below
|
|
|
|
|
uint8_t *const ptr = dest_y + block_offset[i]; |
|
|
|
|
ff_svq3_add_idct_c(ptr, sl->mb + i * 16 + p * 256, linesize, |
|
|
|
|
sl->qscale, IS_INTRA(mb_type) ? 1 : 0); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|