Merge remote-tracking branch 'qatar/master'

* qatar/master: (35 commits)
  flvdec: Do not call parse_keyframes_index with a NULL stream
  libspeexdec: include system headers before local headers
  libspeexdec: return meaningful error codes
  libspeexdec: cosmetics: reindent
  libspeexdec: decode one frame at a time.
  swscale: fix signed shift overflows in ff_yuv2rgb_c_init_tables()
  Move timefilter code from lavf to lavd.
  mov: add support for hdvd and pgapmetadata atoms
  mov: rename function _stik, some indentation cosmetics
  mov: rename function _int8 to remove ambiguity, some indentation cosmetics
  mov: parse the gnre atom
  mp3on4: check for allocation failures in decode_init_mp3on4()
  mp3on4: create a separate flush function for MP3onMP4.
  mp3on4: ensure that the frame channel count does not exceed the codec channel count.
  mp3on4: set channel layout
  mp3on4: fix the output channel order
  mp3on4: allocate temp buffer with av_malloc() instead of on the stack.
  mp3on4: copy MPADSPContext from first context to all contexts.
  fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
  fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm
  ...

Conflicts:
	libavcodec/arm/h264dsp_init_arm.c
	libavcodec/h264.c
	libavcodec/h264.h
	libavcodec/h264_cabac.c
	libavcodec/h264_cavlc.c
	libavcodec/h264_ps.c
	libavcodec/h264dsp_template.c
	libavcodec/h264idct_template.c
	libavcodec/h264pred.c
	libavcodec/h264pred_template.c
	libavcodec/x86/h264dsp_mmx.c
	libavdevice/Makefile
	libavdevice/jack_audio.c
	libavformat/Makefile
	libavformat/flvdec.c
	libavformat/flvenc.c
	libavutil/pixfmt.h
	libswscale/utils.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/2/head
Michael Niedermayer 13 years ago
commit aedc908601
  1. 1
      Changelog
  2. 78
      libavcodec/arm/h264dsp_init_arm.c
  3. 86
      libavcodec/arm/h264dsp_neon.S
  4. 10
      libavcodec/fmtconvert.h
  5. 289
      libavcodec/h264.c
  6. 80
      libavcodec/h264_cabac.c
  7. 28
      libavcodec/h264dsp.c
  8. 10
      libavcodec/h264dsp.h
  9. 30
      libavcodec/h264dsp_template.c
  10. 27
      libavcodec/h264idct_template.c
  11. 13
      libavcodec/h264pred.c
  12. 38
      libavcodec/h264pred_template.c
  13. 60
      libavcodec/libspeexdec.c
  14. 92
      libavcodec/mpegaudiodec.c
  15. 2
      libavcodec/mpegaudiodec_float.c
  16. 44
      libavcodec/ppc/h264_altivec.c
  17. 2
      libavcodec/utils.c
  18. 68
      libavcodec/vp8.c
  19. 11
      libavcodec/vp8.h
  20. 8
      libavcodec/x86/dsputil_yasm.asm
  21. 140
      libavcodec/x86/fmtconvert.asm
  22. 213
      libavcodec/x86/fmtconvert_mmx.c
  23. 210
      libavcodec/x86/h264_weight.asm
  24. 145
      libavcodec/x86/h264_weight_10bit.asm
  25. 177
      libavcodec/x86/h264dsp_mmx.c
  26. 6
      libavdevice/Makefile
  27. 2
      libavdevice/alsa-audio.h
  28. 3
      libavdevice/jack_audio.c
  29. 2
      libavdevice/timefilter.c
  30. 6
      libavdevice/timefilter.h
  31. 5
      libavformat/Makefile
  32. 5
      libavformat/flvdec.c
  33. 34
      libavformat/flvenc.c
  34. 34
      libavformat/mov.c
  35. 23
      libavutil/pixdesc.c
  36. 4
      libavutil/pixfmt.h
  37. 12
      libavutil/x86/x86util.asm
  38. 4
      libswscale/swscale.c
  39. 4
      libswscale/swscale_internal.h
  40. 15
      libswscale/utils.c
  41. 4
      libswscale/yuv2rgb.c
  42. 2
      tests/ref/lavfi/pixdesc
  43. 2
      tests/ref/lavfi/pixfmts_copy
  44. 2
      tests/ref/lavfi/pixfmts_null
  45. 2
      tests/ref/lavfi/pixfmts_scale
  46. 2
      tests/ref/lavfi/pixfmts_vflip

@ -67,6 +67,7 @@ easier to use. The changes are:
- aevalsrc audio source added
- Ut Video decoder
- Speex encoding via libspeex
- 4:2:2 H.264 decoding support
version 0.8:

@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
@ -101,23 +76,14 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
}
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon;
c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon;
c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon;
c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
c->h264_idct_add = ff_h264_idct_add_neon;
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;

@ -1592,7 +1592,7 @@ endfunc
vdup.8 d1, r5
vmov q2, q8
vmov q3, q8
1: subs ip, ip, #2
1: subs r3, r3, #2
vld1.8 {d20-d21},[r0,:128], r2
\macd q2, d0, d20
pld [r0]
@ -1632,7 +1632,7 @@ endfunc
vdup.8 d1, r5
vmov q1, q8
vmov q10, q8
1: subs ip, ip, #2
1: subs r3, r3, #2
vld1.8 {d4},[r0,:64], r2
\macd q1, d0, d4
pld [r0]
@ -1662,7 +1662,7 @@ endfunc
vdup.8 d1, r5
vmov q1, q8
vmov q10, q8
1: subs ip, ip, #4
1: subs r3, r3, #4
vld1.32 {d4[0]},[r0,:32], r2
vld1.32 {d4[1]},[r0,:32], r2
\macd q1, d0, d4
@ -1700,16 +1700,17 @@ endfunc
.endm
.macro biweight_func w
function biweight_h264_pixels_\w\()_neon
function ff_biweight_h264_pixels_\w\()_neon, export=1
push {r4-r6, lr}
add r4, sp, #16
ldr r12, [sp, #16]
add r4, sp, #20
ldm r4, {r4-r6}
lsr lr, r4, #31
add r6, r6, #1
eors lr, lr, r5, lsr #30
orr r6, r6, #1
vdup.16 q9, r3
lsl r6, r6, r3
vdup.16 q9, r12
lsl r6, r6, r12
vmvn q9, q9
vdup.16 q8, r6
mov r6, r0
@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon
endfunc
.endm
.macro biweight_entry w, h, b=1
function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b biweight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
biweight_entry 16, 8
biweight_entry 16, 16, b=0
biweight_func 16
biweight_entry 8, 16
biweight_entry 8, 4
biweight_entry 8, 8, b=0
biweight_func 8
biweight_entry 4, 8
biweight_entry 4, 2
biweight_entry 4, 4, b=0
biweight_func 4
@ Weighted prediction
.macro weight_16 add
vdup.8 d0, r3
1: subs ip, ip, #2
vdup.8 d0, r12
1: subs r2, r2, #2
vld1.8 {d20-d21},[r0,:128], r1
vmull.u8 q2, d0, d20
pld [r0]
@ -1785,8 +1767,8 @@ endfunc
.endm
.macro weight_8 add
vdup.8 d0, r3
1: subs ip, ip, #2
vdup.8 d0, r12
1: subs r2, r2, #2
vld1.8 {d4},[r0,:64], r1
vmull.u8 q1, d0, d4
pld [r0]
@ -1806,10 +1788,10 @@ endfunc
.endm
.macro weight_4 add
vdup.8 d0, r3
vdup.8 d0, r12
vmov q1, q8
vmov q10, q8
1: subs ip, ip, #4
1: subs r2, r2, #4
vld1.32 {d4[0]},[r0,:32], r1
vld1.32 {d4[1]},[r0,:32], r1
vmull.u8 q1, d0, d4
@ -1842,50 +1824,32 @@ endfunc
.endm
.macro weight_func w
function weight_h264_pixels_\w\()_neon
function ff_weight_h264_pixels_\w\()_neon, export=1
push {r4, lr}
ldr r4, [sp, #8]
cmp r2, #1
lsl r4, r4, r2
ldr r12, [sp, #8]
ldr r4, [sp, #12]
cmp r3, #1
lsl r4, r4, r3
vdup.16 q8, r4
mov r4, r0
ble 20f
rsb lr, r2, #1
rsb lr, r3, #1
vdup.16 q9, lr
cmp r3, #0
cmp r12, #0
blt 10f
weight_\w vhadd.s16
10: rsb r3, r3, #0
10: rsb r12, r12, #0
weight_\w vhsub.s16
20: rsb lr, r2, #0
20: rsb lr, r3, #0
vdup.16 q9, lr
cmp r3, #0
cmp r12, #0
blt 10f
weight_\w vadd.s16
10: rsb r3, r3, #0
10: rsb r12, r12, #0
weight_\w vsub.s16
endfunc
.endm
.macro weight_entry w, h, b=1
function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b weight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
weight_entry 16, 8
weight_entry 16, 16, b=0
weight_func 16
weight_entry 8, 16
weight_entry 8, 4
weight_entry 8, 8, b=0
weight_func 8
weight_entry 4, 8
weight_entry 4, 2
weight_entry 4, 4, b=0
weight_func 4

@ -70,7 +70,15 @@ typedef struct FmtConvertContext {
long len, int channels);
/**
* Convert an array of interleaved float to multiple arrays of float.
* Convert multiple arrays of float to an array of interleaved float.
*
* @param dst destination array of interleaved float.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/
void (*float_interleave)(float *dst, const float **src, unsigned int len,
int channels);

@ -460,11 +460,14 @@ static void chroma_dc_dct_c(DCTELEM *block){
}
#endif
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
static av_always_inline void
mc_dir_part(H264Context *h, Picture *pic, int n, int square,
int height, int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int src_x_offset, int src_y_offset,
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
int pixel_shift, int chroma444){
int pixel_shift, int chroma_idc)
{
MpegEncContext * const s = &h->s;
const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
@ -479,6 +482,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
const int full_my= my>>2;
const int pic_width = 16*s->mb_width;
const int pic_height = 16*s->mb_height >> MB_FIELD;
int ysh;
if(mx&7) extra_width -= 3;
if(my&7) extra_height -= 3;
@ -487,7 +491,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
|| full_my < 0-extra_height
|| full_mx + 16/*FIXME*/ > pic_width + extra_width
|| full_my + 16/*FIXME*/ > pic_height + extra_height){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
emu=1;
}
@ -499,7 +504,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
if(chroma444){
if(chroma_idc == 3 /* yuv444 */){
src_cb = pic->f.data[1] + offset;
if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
@ -524,42 +529,55 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
return;
}
if(MB_FIELD){
ysh = 3 - (chroma_idc == 2 /* yuv422 */);
if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){
// chroma offset when predicting from a field of opposite parity
my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
}
src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize;
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize;
src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cb= s->edge_emu_buffer;
}
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cr= s->edge_emu_buffer;
}
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
}
static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
static av_always_inline void
mc_part_std(H264Context *h, int n, int square, int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
int list0, int list1, int pixel_shift, int chroma444){
int list0, int list1, int pixel_shift, int chroma_idc)
{
MpegEncContext * const s = &h->s;
qpel_mc_func *qpix_op= qpix_put;
h264_chroma_mc_func chroma_op= chroma_put;
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
if(chroma444){
if (chroma_idc == 3 /* yuv444 */) {
dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
}else{
} else if (chroma_idc == 2 /* yuv422 */) {
dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
} else /* yuv420 */ {
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
}
@ -568,9 +586,9 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
if(list0){
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
mc_dir_part(h, ref, n, square, height, delta, 0,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma444);
qpix_op, chroma_op, pixel_shift, chroma_idc);
qpix_op= qpix_avg;
chroma_op= chroma_avg;
@ -578,28 +596,36 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
if(list1){
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
mc_dir_part(h, ref, n, square, height, delta, 1,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma444);
qpix_op, chroma_op, pixel_shift, chroma_idc);
}
}
static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
static av_always_inline void
mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
int list0, int list1, int pixel_shift, int chroma444){
int list0, int list1, int pixel_shift, int chroma_idc){
MpegEncContext * const s = &h->s;
int chroma_height;
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
if(chroma444){
if (chroma_idc == 3 /* yuv444 */) {
chroma_height = height;
chroma_weight_avg = luma_weight_avg;
chroma_weight_op = luma_weight_op;
dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
}else{
} else if (chroma_idc == 2 /* yuv422 */) {
chroma_height = height;
dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
} else /* yuv420 */ {
chroma_height = height >> 1;
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
}
@ -615,27 +641,32 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
int refn0 = h->ref_cache[0][ scan8[n] ];
int refn1 = h->ref_cache[1][ scan8[n] ];
mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
x_offset, y_offset, qpix_put, chroma_put,
pixel_shift, chroma_idc);
mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
tmp_y, tmp_cb, tmp_cr,
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
x_offset, y_offset, qpix_put, chroma_put,
pixel_shift, chroma_idc);
if(h->use_weight == 2){
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
int weight1 = 64 - weight0;
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize,
height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
}else{
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
}
@ -643,42 +674,46 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
int list = list1 ? 1 : 0;
int refn = h->ref_cache[list][ scan8[n] ];
Picture *ref= &h->ref_list[list][refn];
mc_dir_part(h, ref, n, square, chroma_height, delta, list,
mc_dir_part(h, ref, n, square, height, delta, list,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put, chroma_put, pixel_shift, chroma444);
qpix_put, chroma_put, pixel_shift, chroma_idc);
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
if(h->use_weight_chroma){
chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
}
}
}
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
static av_always_inline void
mc_part(H264Context *h, int n, int square, int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int x_offset, int y_offset,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int list0, int list1, int pixel_shift, int chroma444){
int list0, int list1, int pixel_shift, int chroma_idc)
{
if((h->use_weight==2 && list0 && list1
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|| h->use_weight==1)
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put,
weight_op[0], weight_op[3], weight_avg[0],
weight_avg[3], list0, list1, pixel_shift, chroma444);
weight_op[0], weight_op[1], weight_avg[0],
weight_avg[1], list0, list1, pixel_shift, chroma_idc);
else
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
chroma_avg, list0, list1, pixel_shift, chroma444);
chroma_avg, list0, list1, pixel_shift, chroma_idc);
}
static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
static av_always_inline void
prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc)
{
/* fetch pixels for estimated mv 4 macroblocks ahead
* optimized for 64byte cache lines */
MpegEncContext * const s = &h->s;
@ -689,7 +724,7 @@ static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, in
uint8_t **src = h->ref_list[list][refn].f.data;
int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
s->dsp.prefetch(src[0]+off, s->linesize, 4);
if(chroma444){
if (chroma_idc == 3 /* yuv444 */) {
s->dsp.prefetch(src[1]+off, s->linesize, 4);
s->dsp.prefetch(src[2]+off, s->linesize, 4);
}else{
@ -703,7 +738,8 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift, int chroma444){
int pixel_shift, int chroma_idc)
{
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
const int mb_type = s->current_picture.f.mb_type[mb_xy];
@ -712,36 +748,36 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
await_references(h);
prefetch_motion(h, 0, pixel_shift, chroma444);
prefetch_motion(h, 0, pixel_shift, chroma_idc);
if(IS_16X16(mb_type)){
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444);
pixel_shift, chroma_idc);
}else if(IS_16X8(mb_type)){
mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1],
weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444);
mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
pixel_shift, chroma_idc);
mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1],
weight_op, weight_avg,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma444);
pixel_shift, chroma_idc);
}else if(IS_8X16(mb_type)){
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444);
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
pixel_shift, chroma_idc);
mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma444);
pixel_shift, chroma_idc);
}else{
int i;
@ -754,50 +790,72 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
int y_offset= (i&2)<<1;
if(IS_SUB_8X8(sub_mb_type)){
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[3], &weight_avg[3],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444);
pixel_shift, chroma_idc);
}else if(IS_SUB_8X4(sub_mb_type)){
mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
mc_part(h, n , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444);
mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
pixel_shift, chroma_idc);
mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444);
pixel_shift, chroma_idc);
}else if(IS_SUB_4X8(sub_mb_type)){
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
mc_part(h, n , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444);
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
pixel_shift, chroma_idc);
mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444);
pixel_shift, chroma_idc);
}else{
int j;
assert(IS_SUB_4X4(sub_mb_type));
for(j=0; j<4; j++){
int sub_x_offset= x_offset + 2*(j&1);
int sub_y_offset= y_offset + (j&2);
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[6], &weight_avg[6],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444);
pixel_shift, chroma_idc);
}
}
}
}
prefetch_motion(h, 1, pixel_shift, chroma444);
prefetch_motion(h, 1, pixel_shift, chroma_idc);
}
static av_always_inline void
hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift)
{
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
}
static av_always_inline void
hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift)
{
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
}
static void free_tables(H264Context *h, int free_rbsp){
@ -1468,7 +1526,10 @@ static void decode_postinit(H264Context *h, int setup_finished){
ff_thread_finish_setup(s->avctx);
}
static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
uint8_t *src_cb, uint8_t *src_cr,
int linesize, int uvlinesize, int simple)
{
MpegEncContext * const s = &h->s;
uint8_t *top_border;
int top_idx = 1;
@ -1813,7 +1874,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
}
}
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift)
{
MpegEncContext * const s = &h->s;
const int mb_x= s->mb_x;
const int mb_y= s->mb_y;
@ -1827,7 +1889,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
/* is_h264 should always be true if SVQ3 is disabled. */
const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
const int block_h = 16>>s->chroma_y_shift;
const int block_h = 16 >> s->chroma_y_shift;
const int chroma422 = CHROMA422;
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
@ -1844,8 +1907,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
block_offset = &h->block_offset[48];
if(mb_y&1){ //FIXME move out of this function?
dest_y -= s->linesize*15;
dest_cb-= s->uvlinesize*(block_h-1);
dest_cr-= s->uvlinesize*(block_h-1);
dest_cb-= s->uvlinesize * (block_h - 1);
dest_cr-= s->uvlinesize * (block_h - 1);
}
if(FRAME_MBAFF) {
int list;
@ -1884,7 +1947,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
}
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (!h->sps.chroma_format_idc) {
for (i = 0; i < 8; i++) {
for (i = 0; i < block_h; i++) {
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
for (j = 0; j < 8; j++) {
@ -1911,13 +1974,13 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (!h->sps.chroma_format_idc) {
for (i=0; i<8; i++) {
memset(dest_cb+ i*uvlinesize, 1 << (bit_depth - 1), 8);
memset(dest_cr+ i*uvlinesize, 1 << (bit_depth - 1), 8);
memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8);
memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
}
} else {
for (i=0; i<block_h; i++) {
memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8);
memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8);
}
}
}
@ -1937,11 +2000,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
}else if(is_h264){
hl_motion(h, dest_y, dest_cb, dest_cr,
if (chroma422) {
hl_motion_422(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 0);
h->h264dsp.biweight_h264_pixels_tab,
pixel_shift);
} else {
hl_motion_420(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab,
pixel_shift);
}
}
hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
@ -1959,14 +2032,20 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
}
if (chroma422) {
for(i=j*16+4; i<j*16+8; i++){
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
idct_add (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize);
}
}
}
}
}else{
if(is_h264){
int qp[2];
if (CHROMA422) {
qp[0] = h->chroma_qp[0]+3;
qp[1] = h->chroma_qp[1]+3;
if (chroma422) {
qp[0] = h->chroma_qp[0] + 3;
qp[1] = h->chroma_qp[1] + 3;
} else {
qp[0] = h->chroma_qp[0];
qp[1] = h->chroma_qp[1];
@ -2086,7 +2165,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 1);
h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
}
for (p = 0; p < plane_count; p++)
@ -2690,6 +2769,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
case 9 :
if (CHROMA444)
s->avctx->pix_fmt = PIX_FMT_YUV444P9;
else if (CHROMA422)
s->avctx->pix_fmt = PIX_FMT_YUV422P9;
else
s->avctx->pix_fmt = PIX_FMT_YUV420P9;
break;
@ -2708,7 +2789,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
s->avctx->pix_fmt = PIX_FMT_GBR24P;
av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n");
}
}else if (CHROMA422) {
} else if (CHROMA422) {
s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
}else{
s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
@ -3384,7 +3465,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
const int end_mb_y= s->mb_y + FRAME_MBAFF;
const int old_slice_type= h->slice_type;
const int pixel_shift = h->pixel_shift;
const int block_h = 16>>s->chroma_y_shift;
const int block_h = 16 >> s->chroma_y_shift;
if(h->deblocking_filter) {
for(mb_x= start_x; mb_x<end_x; mb_x++){
@ -3401,8 +3482,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
s->mb_x= mb_x;
s->mb_y= mb_y;
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h;
dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h;
dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
//FIXME simplify above
if (MB_FIELD) {
@ -3410,8 +3491,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
if(mb_y&1){ //FIXME move out of this function?
dest_y -= s->linesize*15;
dest_cb-= s->uvlinesize*(block_h-1);
dest_cr-= s->uvlinesize*(block_h-1);
dest_cb-= s->uvlinesize * (block_h - 1);
dest_cr-= s->uvlinesize * (block_h - 1);
}
} else {
linesize = h->mb_linesize = s->linesize;

@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
static av_always_inline void
decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
const uint32_t *qmul, int max_coeff,
int is_dc, int chroma422)
{
static const int significant_coeff_flag_offset[2][14] = {
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
{ 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
@ -1593,7 +1598,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
* map node ctx => cabac ctx for level=1 */
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
/* map node ctx => cabac ctx for level>1 */
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
static const uint8_t coeff_abs_levelgt1_ctx[2][8] = {
{ 5, 5, 5, 5, 6, 7, 8, 9 },
{ 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case
};
static const uint8_t coeff_abs_level_transition[2][8] = {
/* update node ctx after decoding a level=1 */
{ 1, 2, 3, 3, 4, 5, 6, 7 },
@ -1652,7 +1660,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
last_coeff_ctx_base, sig_off);
} else {
if (is_dc && max_coeff == 8) { // dc 422
if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
@ -1661,7 +1669,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
#else
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
} else {
if (is_dc && max_coeff == 8) { // dc 422
if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else {
DECODE_SIGNIFICANCE(max_coeff - 1, last, last);
@ -1701,9 +1709,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} \
} else { \
int coeff_abs = 2; \
if (is_dc && max_coeff == 8) \
node_ctx = FFMIN(node_ctx, 6); \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \
\
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
@ -1745,11 +1751,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
}
static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1);
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0);
}
static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1);
}
static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0);
}
/* cat: 0-> DC 16x16 n = 0
@ -1773,6 +1786,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
}
static av_always_inline void
decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
/* read coded block flag */
if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) {
h->non_zero_count_cache[scan8[n]] = 0;
return;
}
decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff);
}
static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
/* read coded block flag */
if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
@ -2325,17 +2351,14 @@ decode_intra_mb:
if(CHROMA444){
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
} else {
const int num_c8x8 = h->sps.chroma_format_idc;
} else if (CHROMA422) {
if( cbp&0x30 ){
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
CHROMA_DC_BLOCK_INDEX+c,
CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
4*num_c8x8);
decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
CHROMA_DC_BLOCK_INDEX + c,
chroma422_dc_scan, 8);
}
}
@ -2344,7 +2367,7 @@ decode_intra_mb:
for( c = 0; c < 2; c++ ) {
DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for (i8x8 = 0; i8x8 < num_c8x8; i8x8++) {
for (i8x8 = 0; i8x8 < 2; i8x8++) {
for (i = 0; i < 4; i++) {
const int index = 16 + 16 * c + 8*i8x8 + i;
//av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16);
@ -2357,6 +2380,29 @@ decode_intra_mb:
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
} else /* yuv420 */ {
if( cbp&0x30 ){
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
if( cbp&0x20 ) {
int c, i;
for( c = 0; c < 2; c++ ) {
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for( i = 0; i < 4; i++ ) {
const int index = 16 + 16 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
}
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);

@ -64,26 +64,14 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo
else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
\
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\
\
c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\

@ -31,16 +31,18 @@
#include "dsputil.h"
//typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset);
/**
* Context for storing H.264 DSP functions
*/
typedef struct H264DSPContext{
/* weighted MC */
h264_weight_func weight_h264_pixels_tab[10];
h264_biweight_func biweight_h264_pixels_tab[10];
h264_weight_func weight_h264_pixels_tab[4];
h264_biweight_func biweight_h264_pixels_tab[4];
/* loop filter */
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);

@ -29,14 +29,16 @@
#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
#define H264_WEIGHT(W,H) \
static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int stride, int log2_denom, int weight, int offset){ \
#define H264_WEIGHT(W) \
static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \
int log2_denom, int weight, int offset) \
{ \
int y; \
pixel *block = (pixel*)p_block; \
pixel *block = (pixel*)_block; \
stride >>= sizeof(pixel)-1; \
offset <<= (log2_denom + (BIT_DEPTH-8)); \
if(log2_denom) offset += 1<<(log2_denom-1); \
for(y=0; y<H; y++, block += stride){ \
for (y = 0; y < height; y++, block += stride) { \
op_scale1(0); \
op_scale1(1); \
if(W==2) continue; \
@ -58,14 +60,16 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int strid
op_scale1(15); \
} \
} \
static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \
static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset) \
{ \
int y; \
pixel *dst = (pixel*)_dst; \
pixel *src = (pixel*)_src; \
stride >>= sizeof(pixel)-1; \
offset <<= (BIT_DEPTH-8); \
offset = ((offset + 1) | 1) << log2_denom; \
for(y=0; y<H; y++, dst += stride, src += stride){ \
for (y = 0; y < height; y++, dst += stride, src += stride) { \
op_scale2(0); \
op_scale2(1); \
if(W==2) continue; \
@ -88,16 +92,10 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_
} \
}
H264_WEIGHT(16,16)
H264_WEIGHT(16,8)
H264_WEIGHT(8,16)
H264_WEIGHT(8,8)
H264_WEIGHT(8,4)
H264_WEIGHT(4,8)
H264_WEIGHT(4,4)
H264_WEIGHT(4,2)
H264_WEIGHT(2,4)
H264_WEIGHT(2,2)
H264_WEIGHT(16)
H264_WEIGHT(8)
H264_WEIGHT(4)
H264_WEIGHT(2)
#undef op_scale1
#undef op_scale2

@ -228,16 +228,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
int i, j;
#if 0
av_log(NULL, AV_LOG_INFO, "idct\n");
int32_t *b = block;
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
for(j=1; j<3; j++){
for(i=j*16; i<j*16+4; i++){
if(nnzc[ scan8[i] ])
@ -296,13 +286,13 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in
#undef stride
}
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2;
const int xStride= 16;
int i;
int temp[8];
static const uint8_t x_offset[2]={0, 16};
dctcoef *block = (dctcoef*)p_block;
dctcoef *block = (dctcoef*)_block;
for(i=0; i<4; i++){
temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
@ -321,22 +311,13 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
}
#if 0
av_log(NULL, AV_LOG_INFO, "after chroma dc\n");
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", block[i]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
}
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2;
const int xStride= 16;
int a,b,c,d,e;
dctcoef *block = (dctcoef*)p_block;
dctcoef *block = (dctcoef*)_block;
a= block[stride*0 + xStride*0];
b= block[stride*0 + xStride*1];

@ -462,10 +462,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\
}\
}else{\
h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
@ -510,8 +510,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\
h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\
h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\
if (chroma_format_idc == 1) {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\
} else {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add , depth);\
}\
h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\
h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\

@ -663,23 +663,45 @@ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred4x4_dc)(src, NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred8x16_top_dc)(src, stride);
FUNCC(pred4x4_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x8_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x16_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
}
static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
int j, k;
int a;
@ -1126,8 +1148,24 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, c
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}

@ -18,11 +18,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include <speex/speex.h>
#include <speex/speex_header.h>
#include <speex/speex_stereo.h>
#include <speex/speex_callbacks.h>
#include "avcodec.h"
typedef struct {
SpeexBits bits;
@ -60,14 +60,14 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx)
mode = speex_lib_get_mode(s->header->mode);
if (!mode) {
av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode);
return -1;
return AVERROR_INVALIDDATA;
}
} else
av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n");
if (avctx->channels > 2) {
av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n");
return -1;
return AVERROR(EINVAL);
}
speex_bits_init(&s->bits);
@ -99,32 +99,42 @@ static int libspeex_decode_frame(AVCodecContext *avctx,
uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
LibSpeexContext *s = avctx->priv_data;
int16_t *output = data, *end;
int i, num_samples;
num_samples = s->frame_size * avctx->channels;
end = output + *data_size / sizeof(*output);
int16_t *output = data;
int out_size, ret, consumed = 0;
/* check output buffer size */
out_size = s->frame_size * avctx->channels *
av_get_bytes_per_sample(avctx->sample_fmt);
if (*data_size < out_size) {
av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n");
return AVERROR(EINVAL);
}
/* if there is not enough data left for the smallest possible frame,
reset the libspeex buffer using the current packet, otherwise ignore
the current packet and keep decoding frames from the libspeex buffer. */
if (speex_bits_remaining(&s->bits) < 43) {
/* check for flush packet */
if (!buf || !buf_size) {
*data_size = 0;
return buf_size;
}
/* set new buffer */
speex_bits_read_from(&s->bits, buf, buf_size);
consumed = buf_size;
}
for (i = 0; speex_bits_remaining(&s->bits) && output + num_samples < end; i++) {
int ret = speex_decode_int(s->dec_state, &s->bits, output);
/* decode a single frame */
ret = speex_decode_int(s->dec_state, &s->bits, output);
if (ret <= -2) {
av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n");
return -1;
} else if (ret == -1)
// end of stream
break;
return AVERROR_INVALIDDATA;
}
if (avctx->channels == 2)
speex_decode_stereo_int(output, s->frame_size, &s->stereo);
output += num_samples;
}
avctx->frame_size = s->frame_size * i;
*data_size = avctx->channels * avctx->frame_size * sizeof(*output);
return buf_size;
*data_size = out_size;
return consumed;
}
static av_cold int libspeex_decode_close(AVCodecContext *avctx)
@ -138,6 +148,12 @@ static av_cold int libspeex_decode_close(AVCodecContext *avctx)
return 0;
}
static av_cold void libspeex_decode_flush(AVCodecContext *avctx)
{
LibSpeexContext *s = avctx->priv_data;
speex_bits_reset(&s->bits);
}
AVCodec ff_libspeex_decoder = {
.name = "libspeex",
.type = AVMEDIA_TYPE_AUDIO,
@ -146,5 +162,7 @@ AVCodec ff_libspeex_decoder = {
.init = libspeex_decode_init,
.close = libspeex_decode_close,
.decode = libspeex_decode_frame,
.flush = libspeex_decode_flush,
.capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY,
.long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
};

@ -1893,24 +1893,50 @@ typedef struct MP3On4DecodeContext {
int syncword; ///< syncword patch
const uint8_t *coff; ///< channels offsets in output buffer
MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
OUT_INT *decoded_buf; ///< output buffer for decoded samples
} MP3On4DecodeContext;
#include "mpeg4audio.h"
/* Next 3 arrays are indexed by channel config number (passed via codecdata) */
static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */
/* offsets into output buffer, assume output order is FL FR BL BR C LFE */
/* offsets into output buffer, assume output order is FL FR C LFE BL BR SL SR */
static const uint8_t chan_offset[8][5] = {
{0},
{0}, // C
{0}, // FLR
{2,0}, // C FLR
{2,0,3}, // C FLR BS
{4,0,2}, // C FLR BLRS
{4,0,2,5}, // C FLR BLRS LFE
{4,0,2,6,5}, // C FLR BLRS BLR LFE
{2,0,3}, // C FLR BLRS
{2,0,4,3}, // C FLR BLRS LFE
{2,0,6,4,3}, // C FLR BLRS BLR LFE
};
/* mp3on4 channel layouts */
static const int16_t chan_layout[8] = {
0,
AV_CH_LAYOUT_MONO,
AV_CH_LAYOUT_STEREO,
AV_CH_LAYOUT_SURROUND,
AV_CH_LAYOUT_4POINT0,
AV_CH_LAYOUT_5POINT0,
AV_CH_LAYOUT_5POINT1,
AV_CH_LAYOUT_7POINT1
};
static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
{
MP3On4DecodeContext *s = avctx->priv_data;
int i;
for (i = 0; i < s->frames; i++)
av_free(s->mp3decctx[i]);
av_freep(&s->decoded_buf);
return 0;
}
static int decode_init_mp3on4(AVCodecContext * avctx)
{
@ -1931,6 +1957,7 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
s->frames = mp3Frames[cfg.chan_config];
s->coff = chan_offset[cfg.chan_config];
avctx->channels = ff_mpeg4audio_channels[cfg.chan_config];
avctx->channel_layout = chan_layout[cfg.chan_config];
if (cfg.sample_rate < 16000)
s->syncword = 0xffe00000;
@ -1944,6 +1971,8 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/
// Allocate zeroed memory for the first decoder context
s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[0])
goto alloc_fail;
// Put decoder context in place to make init_decode() happy
avctx->priv_data = s->mp3decctx[0];
decode_init(avctx);
@ -1956,23 +1985,38 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/
for (i = 1; i < s->frames; i++) {
s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[i])
goto alloc_fail;
s->mp3decctx[i]->adu_mode = 1;
s->mp3decctx[i]->avctx = avctx;
s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp;
}
/* Allocate buffer for multi-channel output if needed */
if (s->frames > 1) {
s->decoded_buf = av_malloc(MPA_FRAME_SIZE * MPA_MAX_CHANNELS *
sizeof(*s->decoded_buf));
if (!s->decoded_buf)
goto alloc_fail;
}
return 0;
alloc_fail:
decode_close_mp3on4(avctx);
return AVERROR(ENOMEM);
}
static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
static void flush_mp3on4(AVCodecContext *avctx)
{
MP3On4DecodeContext *s = avctx->priv_data;
int i;
MP3On4DecodeContext *s = avctx->priv_data;
for (i = 0; i < s->frames; i++)
av_free(s->mp3decctx[i]);
return 0;
for (i = 0; i < s->frames; i++) {
MPADecodeContext *m = s->mp3decctx[i];
memset(m->synth_buf, 0, sizeof(m->synth_buf));
m->last_buf_size = 0;
}
}
@ -1987,12 +2031,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
int fsize, len = buf_size, out_size = 0;
uint32_t header;
OUT_INT *out_samples = data;
OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS];
OUT_INT *outptr, *bp;
int fr, j, n;
int fr, j, n, ch;
if(*data_size < MPA_FRAME_SIZE * MPA_MAX_CHANNELS * s->frames * sizeof(OUT_INT))
return -1;
if (*data_size < MPA_FRAME_SIZE * avctx->channels * sizeof(OUT_INT)) {
av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
return AVERROR(EINVAL);
}
*data_size = 0;
// Discard too short frames
@ -2000,10 +2045,11 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
return -1;
// If only one decoder interleave is not needed
outptr = s->frames == 1 ? out_samples : decoded_buf;
outptr = s->frames == 1 ? out_samples : s->decoded_buf;
avctx->bit_rate = 0;
ch = 0;
for (fr = 0; fr < s->frames; fr++) {
fsize = AV_RB16(buf) >> 4;
fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE);
@ -2016,6 +2062,14 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
break;
avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
if (ch + m->nb_channels > avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec "
"channel count\n");
return AVERROR_INVALIDDATA;
}
ch += m->nb_channels;
out_size += mp_decode_frame(m, outptr, buf, fsize);
buf += fsize;
len -= fsize;
@ -2026,13 +2080,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
bp = out_samples + s->coff[fr];
if(m->nb_channels == 1) {
for(j = 0; j < n; j++) {
*bp = decoded_buf[j];
*bp = s->decoded_buf[j];
bp += avctx->channels;
}
} else {
for(j = 0; j < n; j++) {
bp[0] = decoded_buf[j++];
bp[1] = decoded_buf[j];
bp[0] = s->decoded_buf[j++];
bp[1] = s->decoded_buf[j];
bp += avctx->channels;
}
}
@ -2110,7 +2164,7 @@ AVCodec ff_mp3on4_decoder = {
.init = decode_init_mp3on4,
.close = decode_close_mp3on4,
.decode = decode_frame_mp3on4,
.flush = flush,
.flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
};
#endif

@ -83,7 +83,7 @@ AVCodec ff_mp3on4float_decoder = {
.init = decode_init_mp3on4,
.close = decode_close_mp3on4,
.decode = decode_frame_mp3on4,
.flush = flush,
.flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
};
#endif

@ -843,7 +843,8 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
}
static av_always_inline
void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
void weight_h264_W_altivec(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset, int w)
{
int y, aligned;
vec_u8 vblock;
@ -864,7 +865,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
voffset = vec_splat(vtemp, 5);
aligned = !((unsigned long)block & 0xf);
for (y=0; y<h; y++) {
for (y = 0; y < height; y++) {
vblock = vec_ld(0, block);
v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
@ -888,8 +889,8 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
}
static av_always_inline
void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
int weightd, int weights, int offset, int w, int h)
void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset, int w)
{
int y, dst_aligned, src_aligned;
vec_u8 vsrc, vdst;
@ -912,7 +913,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
dst_aligned = !((unsigned long)dst & 0xf);
src_aligned = !((unsigned long)src & 0xf);
for (y=0; y<h; y++) {
for (y = 0; y < height; y++) {
vdst = vec_ld(0, dst);
vsrc = vec_ld(0, src);
@ -952,19 +953,18 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
}
}
#define H264_WEIGHT(W,H) \
static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
#define H264_WEIGHT(W) \
static void ff_weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \
int log2_denom, int weight, int offset){ \
weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \
}\
static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
static void ff_biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \
}
H264_WEIGHT(16,16)
H264_WEIGHT(16, 8)
H264_WEIGHT( 8,16)
H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
H264_WEIGHT(16)
H264_WEIGHT( 8)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
@ -1015,16 +1015,10 @@ void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_altivec;
}
}
}

@ -158,6 +158,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int l
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV444P9LE:

@ -41,24 +41,57 @@ static void free_buffers(VP8Context *s)
av_freep(&s->top_nnz);
av_freep(&s->edge_emu_buffer);
av_freep(&s->top_border);
av_freep(&s->segmentation_map);
s->macroblocks = NULL;
}
static void vp8_decode_flush(AVCodecContext *avctx)
static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
{
int ret;
if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
return ret;
if (!s->maps_are_invalid && s->num_maps_to_be_freed) {
f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
} else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
ff_thread_release_buffer(s->avctx, f);
return AVERROR(ENOMEM);
}
return 0;
}
static void vp8_release_frame(VP8Context *s, AVFrame *f, int is_close)
{
if (!is_close) {
if (f->ref_index[0]) {
assert(s->num_maps_to_be_freed < FF_ARRAY_ELEMS(s->segmentation_maps));
s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
f->ref_index[0] = NULL;
}
} else {
av_freep(&f->ref_index[0]);
}
ff_thread_release_buffer(s->avctx, f);
}
static void vp8_decode_flush_impl(AVCodecContext *avctx, int force, int is_close)
{
VP8Context *s = avctx->priv_data;
int i;
if (!avctx->is_copy) {
if (!avctx->is_copy || force) {
for (i = 0; i < 5; i++)
if (s->frames[i].data[0])
ff_thread_release_buffer(avctx, &s->frames[i]);
vp8_release_frame(s, &s->frames[i], is_close);
}
memset(s->framep, 0, sizeof(s->framep));
free_buffers(s);
s->maps_are_invalid = 1;
}
static void vp8_decode_flush(AVCodecContext *avctx)
{
vp8_decode_flush_impl(avctx, 0, 0);
}
static int update_dimensions(VP8Context *s, int width, int height)
@ -68,7 +101,7 @@ static int update_dimensions(VP8Context *s, int width, int height)
if (av_image_check_size(width, height, 0, s->avctx))
return AVERROR_INVALIDDATA;
vp8_decode_flush(s->avctx);
vp8_decode_flush_impl(s->avctx, 1, 0);
avcodec_set_dimensions(s->avctx, width, height);
}
@ -81,10 +114,9 @@ static int update_dimensions(VP8Context *s, int width, int height)
s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
!s->top_nnz || !s->top_border || !s->segmentation_map)
!s->top_nnz || !s->top_border)
return AVERROR(ENOMEM);
s->macroblocks = s->macroblocks_base + 1;
@ -1508,6 +1540,14 @@ static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
}
}
static void release_queued_segmaps(VP8Context *s, int is_close)
{
int leave_behind = is_close ? 0 : !s->maps_are_invalid;
while (s->num_maps_to_be_freed > leave_behind)
av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
s->maps_are_invalid = 0;
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt)
{
@ -1516,6 +1556,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
enum AVDiscard skip_thresh;
AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
release_queued_segmaps(s, 0);
if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
return ret;
@ -1538,7 +1580,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
ff_thread_release_buffer(avctx, &s->frames[i]);
vp8_release_frame(s, &s->frames[i], 0);
// find a free buffer
for (i = 0; i < 5; i++)
@ -1559,8 +1601,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0;
curframe->ref_index[0] = s->segmentation_map;
if ((ret = ff_thread_get_buffer(avctx, curframe))) {
if ((ret = vp8_alloc_frame(s, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
return ret;
}
@ -1652,8 +1693,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
@ -1736,7 +1777,8 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
static av_cold int vp8_decode_free(AVCodecContext *avctx)
{
vp8_decode_flush(avctx);
vp8_decode_flush_impl(avctx, 0, 1);
release_queued_segmaps(avctx->priv_data, 1);
return 0;
}

@ -130,7 +130,6 @@ typedef struct {
uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4];
uint8_t *segmentation_map;
/**
* Macroblocks can have one of 4 different quants in a frame when
@ -237,6 +236,16 @@ typedef struct {
H264PredContext hpc;
vp8_mc_func put_pixels_tab[3][3][3];
AVFrame frames[5];
/**
* A list of segmentation_map buffers that are to be free()'ed in
* the next decoding iteration. We can't free() them right away
* because the map may still be used by subsequent decoding threads.
* Unused if frame threading is off.
*/
uint8_t *segmentation_maps[5];
int num_maps_to_be_freed;
int maps_are_invalid;
} VP8Context;
#endif /* AVCODEC_VP8_H */

@ -1055,14 +1055,6 @@ emu_edge mmx
; int32_t max, unsigned int len)
;-----------------------------------------------------------------------------
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro VECTOR_CLIP_INT32 4
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
%ifidn %1, sse2

@ -24,6 +24,146 @@
SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 2
%ifdef ARCH_X86_64
cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
%else
cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
movss m0, mulm
%endif
SPLATD m0
shl lenq, 2
add srcq, lenq
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtdq2ps m1, [srcq+lenq ]
cvtdq2ps m2, [srcq+lenq+16]
%else
cvtpi2ps m1, [srcq+lenq ]
cvtpi2ps m3, [srcq+lenq+ 8]
cvtpi2ps m2, [srcq+lenq+16]
cvtpi2ps m4, [srcq+lenq+24]
movlhps m1, m3
movlhps m2, m4
%endif
mulps m1, m0
mulps m2, m0
mova [dstq+lenq ], m1
mova [dstq+lenq+16], m2
add lenq, 32
jl .loop
REP_RET
%endmacro
INIT_XMM
%define SPLATD SPLATD_SSE
%define movdqa movaps
INT32_TO_FLOAT_FMUL_SCALAR sse, 5
%undef movdqa
%define SPLATD SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
%undef SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16 2
cglobal float_to_int16_%1, 3,3,%2, dst, src, len
add lenq, lenq
lea srcq, [srcq+2*lenq]
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [srcq+2*lenq ]
cvtps2pi m1, [srcq+2*lenq+ 8]
cvtps2pi m2, [srcq+2*lenq+16]
cvtps2pi m3, [srcq+2*lenq+24]
packssdw m0, m1
packssdw m2, m3
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m2
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16 sse2, 2
INIT_MMX
FLOAT_TO_INT16 sse, 0
%define cvtps2pi pf2id
FLOAT_TO_INT16 3dnow, 0
%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_INTERLEAVE2 1
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
lea lenq, [4*r2q]
mov src1q, [src0q+gprsize]
mov src0q, [src0q]
add dstq, lenq
add src0q, lenq
add src1q, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [src0q+lenq]
cvtps2dq m1, [src1q+lenq]
packssdw m0, m1
movhlps m1, m0
punpcklwd m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [src0q+lenq ]
cvtps2pi m1, [src0q+lenq+8]
cvtps2pi m2, [src1q+lenq ]
cvtps2pi m3, [src1q+lenq+8]
packssdw m0, m1
packssdw m2, m3
mova m1, m0
punpcklwd m0, m2
punpckhwd m1, m2
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m1
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define cvtps2pi pf2id
FLOAT_TO_INT16_INTERLEAVE2 3dnow
%undef cvtps2pi
%define movdqa movaps
FLOAT_TO_INT16_INTERLEAVE2 sse
%undef movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e
%endmacro

@ -26,133 +26,32 @@
#include "libavutil/x86_cpu.h"
#include "libavcodec/fmtconvert.h"
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
{
x86_reg i = -4*len;
__asm__ volatile(
"movss %3, %%xmm4 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"1: \n"
"cvtpi2ps (%2,%0), %%xmm0 \n"
"cvtpi2ps 8(%2,%0), %%xmm1 \n"
"cvtpi2ps 16(%2,%0), %%xmm2 \n"
"cvtpi2ps 24(%2,%0), %%xmm3 \n"
"movlhps %%xmm1, %%xmm0 \n"
"movlhps %%xmm3, %%xmm2 \n"
"mulps %%xmm4, %%xmm0 \n"
"mulps %%xmm4, %%xmm2 \n"
"movaps %%xmm0, (%1,%0) \n"
"movaps %%xmm2, 16(%1,%0) \n"
"add $32, %0 \n"
"jl 1b \n"
:"+r"(i)
:"r"(dst+len), "r"(src+len), "m"(mul)
);
}
static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len)
{
x86_reg i = -4*len;
__asm__ volatile(
"movss %3, %%xmm4 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"1: \n"
"cvtdq2ps (%2,%0), %%xmm0 \n"
"cvtdq2ps 16(%2,%0), %%xmm1 \n"
"mulps %%xmm4, %%xmm0 \n"
"mulps %%xmm4, %%xmm1 \n"
"movaps %%xmm0, (%1,%0) \n"
"movaps %%xmm1, 16(%1,%0) \n"
"add $32, %0 \n"
"jl 1b \n"
:"+r"(i)
:"r"(dst+len), "r"(src+len), "m"(mul)
);
}
#if HAVE_YASM
static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
x86_reg reglen = len;
// not bit-exact: pf2id uses different rounding than C and SSE
__asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
"neg %0 \n\t"
"1: \n\t"
"pf2id (%2,%0,2) , %%mm0 \n\t"
"pf2id 8(%2,%0,2) , %%mm1 \n\t"
"pf2id 16(%2,%0,2) , %%mm2 \n\t"
"pf2id 24(%2,%0,2) , %%mm3 \n\t"
"packssdw %%mm1 , %%mm0 \n\t"
"packssdw %%mm3 , %%mm2 \n\t"
"movq %%mm0 , (%1,%0) \n\t"
"movq %%mm2 , 8(%1,%0) \n\t"
"add $16 , %0 \n\t"
" js 1b \n\t"
"femms \n\t"
:"+r"(reglen), "+r"(dst), "+r"(src)
);
}
void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
static void float_to_int16_sse(int16_t *dst, const float *src, long len){
x86_reg reglen = len;
__asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
"neg %0 \n\t"
"1: \n\t"
"cvtps2pi (%2,%0,2) , %%mm0 \n\t"
"cvtps2pi 8(%2,%0,2) , %%mm1 \n\t"
"cvtps2pi 16(%2,%0,2) , %%mm2 \n\t"
"cvtps2pi 24(%2,%0,2) , %%mm3 \n\t"
"packssdw %%mm1 , %%mm0 \n\t"
"packssdw %%mm3 , %%mm2 \n\t"
"movq %%mm0 , (%1,%0) \n\t"
"movq %%mm2 , 8(%1,%0) \n\t"
"add $16 , %0 \n\t"
" js 1b \n\t"
"emms \n\t"
:"+r"(reglen), "+r"(dst), "+r"(src)
);
}
void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
x86_reg reglen = len;
__asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
"neg %0 \n\t"
"1: \n\t"
"cvtps2dq (%2,%0,2) , %%xmm0 \n\t"
"cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"
"packssdw %%xmm1 , %%xmm0 \n\t"
"movdqa %%xmm0 , (%1,%0) \n\t"
"add $16 , %0 \n\t"
" js 1b \n\t"
:"+r"(reglen), "+r"(dst), "+r"(src)
);
}
void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len);
void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
#if !HAVE_YASM
#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#endif
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
int i,j,c;\
for(c=0; c<channels; c++){\
float_to_int16_##cpu(tmp, src[c], len);\
ff_float_to_int16_##cpu(tmp, src[c], len);\
for(i=0, j=c; i<len; i++, j+=channels)\
dst[j] = tmp[i];\
}\
@ -160,73 +59,18 @@ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
\
static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
if(channels==1)\
float_to_int16_##cpu(dst, src[0], len);\
ff_float_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
x86_reg reglen = len; \
const float *src0 = src[0];\
const float *src1 = src[1];\
__asm__ volatile(\
"shl $2, %0 \n"\
"add %0, %1 \n"\
"add %0, %2 \n"\
"add %0, %3 \n"\
"neg %0 \n"\
body\
:"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
);\
ff_float_to_int16_interleave2_##cpu(dst, src, len);\
}else if(channels==6){\
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
FLOAT_TO_INT16_INTERLEAVE(3dnow,
"1: \n"
"pf2id (%2,%0), %%mm0 \n"
"pf2id 8(%2,%0), %%mm1 \n"
"pf2id (%3,%0), %%mm2 \n"
"pf2id 8(%3,%0), %%mm3 \n"
"packssdw %%mm1, %%mm0 \n"
"packssdw %%mm3, %%mm2 \n"
"movq %%mm0, %%mm1 \n"
"punpcklwd %%mm2, %%mm0 \n"
"punpckhwd %%mm2, %%mm1 \n"
"movq %%mm0, (%1,%0)\n"
"movq %%mm1, 8(%1,%0)\n"
"add $16, %0 \n"
"js 1b \n"
"femms \n"
)
FLOAT_TO_INT16_INTERLEAVE(sse,
"1: \n"
"cvtps2pi (%2,%0), %%mm0 \n"
"cvtps2pi 8(%2,%0), %%mm1 \n"
"cvtps2pi (%3,%0), %%mm2 \n"
"cvtps2pi 8(%3,%0), %%mm3 \n"
"packssdw %%mm1, %%mm0 \n"
"packssdw %%mm3, %%mm2 \n"
"movq %%mm0, %%mm1 \n"
"punpcklwd %%mm2, %%mm0 \n"
"punpckhwd %%mm2, %%mm1 \n"
"movq %%mm0, (%1,%0)\n"
"movq %%mm1, 8(%1,%0)\n"
"add $16, %0 \n"
"js 1b \n"
"emms \n"
)
FLOAT_TO_INT16_INTERLEAVE(sse2,
"1: \n"
"cvtps2dq (%2,%0), %%xmm0 \n"
"cvtps2dq (%3,%0), %%xmm1 \n"
"packssdw %%xmm1, %%xmm0 \n"
"movhlps %%xmm0, %%xmm1 \n"
"punpcklwd %%xmm1, %%xmm0 \n"
"movdqa %%xmm0, (%1,%0) \n"
"add $16, %0 \n"
"js 1b \n"
)
FLOAT_TO_INT16_INTERLEAVE(3dnow)
FLOAT_TO_INT16_INTERLEAVE(sse)
FLOAT_TO_INT16_INTERLEAVE(sse2)
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
if(channels==6)
@ -235,7 +79,6 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
float_to_int16_interleave_3dnow(dst, src, len, channels);
}
#if HAVE_YASM
void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
@ -269,34 +112,32 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) {
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
c->float_interleave = float_interleave_mmx;
#endif
if(mm_flags & AV_CPU_FLAG_3DNOW){
if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16 = float_to_int16_3dnow;
c->float_to_int16 = ff_float_to_int16_3dnow;
c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
}
}
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
}
}
if(mm_flags & AV_CPU_FLAG_SSE){
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
c->float_to_int16 = float_to_int16_sse;
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
c->float_to_int16 = ff_float_to_int16_sse;
c->float_to_int16_interleave = float_to_int16_interleave_sse;
#if HAVE_YASM
c->float_interleave = float_interleave_sse;
#endif
}
if(mm_flags & AV_CPU_FLAG_SSE2){
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
c->float_to_int16 = float_to_int16_sse2;
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
c->float_to_int16 = ff_float_to_int16_sse2;
c->float_to_int16_interleave = float_to_int16_interleave_sse2;
}
}
#endif
}

@ -28,21 +28,20 @@ SECTION .text
;-----------------------------------------------------------------------------
; biweight pred:
;
; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int log2_denom, int weightd, int weights,
; int offset);
; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int height, int log2_denom, int weightd,
; int weights, int offset);
; and
; void h264_weight_16x16_sse2(uint8_t *dst, int stride,
; int log2_denom, int weight,
; int offset);
; void h264_weight_16_sse2(uint8_t *dst, int stride, int height,
; int log2_denom, int weight, int offset);
;-----------------------------------------------------------------------------
%macro WEIGHT_SETUP 0
add r4, r4
inc r4
movd m3, r3d
movd m5, r4d
movd m6, r2d
add r5, r5
inc r5
movd m3, r4d
movd m5, r5d
movd m6, r3d
pslld m5, m6
psrld m5, 1
%if mmsize == 16
@ -71,60 +70,41 @@ SECTION .text
packuswb m0, m1
%endmacro
%macro WEIGHT_FUNC_DBL_MM 1
cglobal h264_weight_16x%1_mmx2, 5, 5, 0
INIT_MMX
cglobal h264_weight_16_mmx2, 6, 6, 0
WEIGHT_SETUP
mov r2, %1
%if %1 == 16
.nextrow
WEIGHT_OP 0, 4
mova [r0 ], m0
WEIGHT_OP 8, 12
mova [r0+8], m0
add r0, r1
dec r2
dec r2d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_weight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX
WEIGHT_FUNC_DBL_MM 16
WEIGHT_FUNC_DBL_MM 8
%macro WEIGHT_FUNC_MM 4
cglobal h264_weight_%1x%2_%4, 7, 7, %3
%macro WEIGHT_FUNC_MM 3
cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP
mov r2, %2
%if %2 == 16
.nextrow
WEIGHT_OP 0, mmsize/2
mova [r0], m0
add r0, r1
dec r2
dec r2d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_weight_%1x16_%4.nextrow)
%endif
%endmacro
INIT_MMX
WEIGHT_FUNC_MM 8, 16, 0, mmx2
WEIGHT_FUNC_MM 8, 8, 0, mmx2
WEIGHT_FUNC_MM 8, 4, 0, mmx2
WEIGHT_FUNC_MM 8, 0, mmx2
INIT_XMM
WEIGHT_FUNC_MM 16, 16, 8, sse2
WEIGHT_FUNC_MM 16, 8, 8, sse2
WEIGHT_FUNC_MM 16, 8, sse2
%macro WEIGHT_FUNC_HALF_MM 5
cglobal h264_weight_%1x%2_%5, 5, 5, %4
%macro WEIGHT_FUNC_HALF_MM 3
cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP
mov r2, %2/2
sar r2d, 1
lea r3, [r1*2]
%if %2 == mmsize
.nextrow
WEIGHT_OP 0, r1
movh [r0], m0
@ -135,31 +115,34 @@ cglobal h264_weight_%1x%2_%5, 5, 5, %4
movh [r0+r1], m0
%endif
add r0, r3
dec r2
dec r2d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_weight_%1x%3_%5.nextrow)
%endif
%endmacro
INIT_MMX
WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM
WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SETUP 0
add r6, 1
or r6, 1
add r3, 1
movd m3, r4d
movd m4, r5d
movd m5, r6d
movd m6, r3d
%ifdef ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
%endif
mov off_regd, r7m
add off_regd, 1
or off_regd, 1
add r4, 1
movd m3, r5d
movd m4, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6
psrld m5, 1
%if mmsize == 16
@ -195,11 +178,10 @@ WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m1
%endmacro
%macro BIWEIGHT_FUNC_DBL_MM 1
cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
INIT_MMX
cglobal h264_biweight_16_mmx2, 7, 7, 0
BIWEIGHT_SETUP
mov r3, %1
%if %1 == 16
movifnidn r3d, r3m
.nextrow
BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, 4
@ -211,23 +193,14 @@ cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
mova [r0+8], m0
add r0, r2
add r1, r2
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX
BIWEIGHT_FUNC_DBL_MM 16
BIWEIGHT_FUNC_DBL_MM 8
%macro BIWEIGHT_FUNC_MM 4
cglobal h264_biweight_%1x%2_%4, 7, 7, %3
%macro BIWEIGHT_FUNC_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP
mov r3, %2
%if %2 == 16
movifnidn r3d, r3m
.nextrow
BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, mmsize/2
@ -235,28 +208,22 @@ cglobal h264_biweight_%1x%2_%4, 7, 7, %3
mova [r0], m0
add r0, r2
add r1, r2
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_%1x16_%4.nextrow)
%endif
%endmacro
INIT_MMX
BIWEIGHT_FUNC_MM 8, 16, 0, mmx2
BIWEIGHT_FUNC_MM 8, 8, 0, mmx2
BIWEIGHT_FUNC_MM 8, 4, 0, mmx2
BIWEIGHT_FUNC_MM 8, 0, mmx2
INIT_XMM
BIWEIGHT_FUNC_MM 16, 16, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, sse2
%macro BIWEIGHT_FUNC_HALF_MM 5
cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%macro BIWEIGHT_FUNC_HALF_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP
mov r3, %2/2
movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2]
%if %2 == mmsize
.nextrow
BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, r2
@ -270,31 +237,30 @@ cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%endif
add r0, r4
add r1, r4
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow)
%endif
%endmacro
INIT_MMX
BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM
BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SSSE3_SETUP 0
add r6, 1
or r6, 1
add r3, 1
movd m4, r4d
movd m0, r5d
movd m5, r6d
movd m6, r3d
%ifdef ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
%endif
mov off_regd, r7m
add off_regd, 1
or off_regd, 1
add r4, 1
movd m4, r5d
movd m0, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6
psrld m5, 1
punpcklbw m4, m0
@ -314,12 +280,11 @@ BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m2
%endmacro
%macro BIWEIGHT_SSSE3_16 1
cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
INIT_XMM
cglobal h264_biweight_16_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP
mov r3, %1
movifnidn r3d, r3m
%if %1 == 16
.nextrow
movh m0, [r0]
movh m2, [r0+8]
@ -330,25 +295,17 @@ cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
mova [r0], m0
add r0, r2
add r1, r2
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM
BIWEIGHT_SSSE3_16 16
BIWEIGHT_SSSE3_16 8
%macro BIWEIGHT_SSSE3_8 1
cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
cglobal h264_biweight_8_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP
mov r3, %1/2
movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2]
%if %1 == 16
.nextrow
movh m0, [r0]
movh m1, [r1]
@ -361,15 +318,6 @@ cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
movhps [r0+r2], m0
add r0, r4
add r1, r4
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM
BIWEIGHT_SSSE3_8 16
BIWEIGHT_SSSE3_8 8
BIWEIGHT_SSSE3_8 4

@ -36,33 +36,26 @@ cextern pw_1
SECTION .text
;-----------------------------------------------------------------------------
; void h264_weight(uint8_t *dst, int stride, int log2_denom,
; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
; int weight, int offset);
;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32
DECLARE_REG_TMP 2
%else
DECLARE_REG_TMP 10
%endif
%macro WEIGHT_PROLOGUE 1
mov t0, %1
%macro WEIGHT_PROLOGUE 0
.prologue
PROLOGUE 0,5,8
PROLOGUE 0,6,8
movifnidn r0, r0mp
movifnidn r1d, r1m
movifnidn r3d, r3m
movifnidn r4d, r4m
movifnidn r5d, r5m
%endmacro
%macro WEIGHT_SETUP 1
mova m0, [pw_1]
movd m2, r2m
movd m2, r3m
pslld m0, m2 ; 1<<log2_denom
SPLATW m0, m0
shl r4, 19 ; *8, move to upper half of dword
lea r4, [r4+r3*2+0x10000]
movd m3, r4d ; weight<<1 | 1+(offset<<(3))
shl r5, 19 ; *8, move to upper half of dword
lea r5, [r5+r4*2+0x10000]
movd m3, r5d ; weight<<1 | 1+(offset<<(3))
pshufd m3, m3, 0
mova m4, [pw_pixel_max]
paddw m2, [sq_1] ; log2_denom+1
@ -96,8 +89,8 @@ DECLARE_REG_TMP 10
%endmacro
%macro WEIGHT_FUNC_DBL 1
cglobal h264_weight_16x16_10_%1
WEIGHT_PROLOGUE 16
cglobal h264_weight_16_10_%1
WEIGHT_PROLOGUE
WEIGHT_SETUP %1
.nextrow
WEIGHT_OP %1, 0
@ -105,13 +98,9 @@ cglobal h264_weight_16x16_10_%1
WEIGHT_OP %1, 16
mova [r0+16], m5
add r0, r1
dec t0
dec r2d
jnz .nextrow
REP_RET
cglobal h264_weight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_16x16_10_%1.prologue)
%endmacro
INIT_XMM
@ -120,24 +109,16 @@ WEIGHT_FUNC_DBL sse4
%macro WEIGHT_FUNC_MM 1
cglobal h264_weight_8x16_10_%1
WEIGHT_PROLOGUE 16
cglobal h264_weight_8_10_%1
WEIGHT_PROLOGUE
WEIGHT_SETUP %1
.nextrow
WEIGHT_OP %1, 0
mova [r0], m5
add r0, r1
dec t0
dec r2d
jnz .nextrow
REP_RET
cglobal h264_weight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
cglobal h264_weight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
%endmacro
INIT_XMM
@ -146,8 +127,9 @@ WEIGHT_FUNC_MM sse4
%macro WEIGHT_FUNC_HALF_MM 1
cglobal h264_weight_4x8_10_%1
WEIGHT_PROLOGUE 4
cglobal h264_weight_4_10_%1
WEIGHT_PROLOGUE
sar r2d, 1
WEIGHT_SETUP %1
lea r3, [r1*2]
.nextrow
@ -155,17 +137,9 @@ cglobal h264_weight_4x8_10_%1
movh [r0], m5
movhps [r0+r1], m5
add r0, r3
dec t0
dec r2d
jnz .nextrow
REP_RET
cglobal h264_weight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
cglobal h264_weight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
%endmacro
INIT_XMM
@ -174,40 +148,40 @@ WEIGHT_FUNC_HALF_MM sse4
;-----------------------------------------------------------------------------
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
; int weightd, int weights, int offset);
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
; int log2_denom, int weightd, int weights, int offset);
;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32
DECLARE_REG_TMP 2,3
DECLARE_REG_TMP 3
%else
DECLARE_REG_TMP 10,2
DECLARE_REG_TMP 10
%endif
%macro BIWEIGHT_PROLOGUE 1
mov t0, %1
%macro BIWEIGHT_PROLOGUE 0
.prologue
PROLOGUE 0,7,8
movifnidn r0, r0mp
movifnidn r1, r1mp
movifnidn t1d, r2m
movifnidn r4d, r4m
movifnidn r2d, r2m
movifnidn r5d, r5m
movifnidn r6d, r6m
movifnidn t0d, r7m
%endmacro
%macro BIWEIGHT_SETUP 1
lea r6, [r6*4+1] ; (offset<<2)+1
or r6, 1
shl r5, 16
or r4, r5
movd m4, r4d ; weightd | weights
movd m5, r6d ; (offset+1)|1
movd m6, r3m ; log2_denom
lea t0, [t0*4+1] ; (offset<<2)+1
or t0, 1
shl r6, 16
or r5, r6
movd m4, r5d ; weightd | weights
movd m5, t0d ; (offset+1)|1
movd m6, r4m ; log2_denom
pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
paddd m6, [sq_1]
pshufd m4, m4, 0
pshufd m5, m5, 0
mova m3, [pw_pixel_max]
movifnidn r3d, r3m
%ifnidn %1, sse4
pxor m7, m7
%endif
@ -243,23 +217,19 @@ DECLARE_REG_TMP 10,2
%endmacro
%macro BIWEIGHT_FUNC_DBL 1
cglobal h264_biweight_16x16_10_%1
BIWEIGHT_PROLOGUE 16
cglobal h264_biweight_16_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1
.nextrow
BIWEIGHT %1, 0
mova [r0 ], m0
BIWEIGHT %1, 16
mova [r0+16], m0
add r0, t1
add r1, t1
dec t0
add r0, r2
add r1, r2
dec r3d
jnz .nextrow
REP_RET
cglobal h264_biweight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_16x16_10_%1.prologue)
%endmacro
INIT_XMM
@ -267,25 +237,17 @@ BIWEIGHT_FUNC_DBL sse2
BIWEIGHT_FUNC_DBL sse4
%macro BIWEIGHT_FUNC 1
cglobal h264_biweight_8x16_10_%1
BIWEIGHT_PROLOGUE 16
cglobal h264_biweight_8_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1
.nextrow
BIWEIGHT %1, 0
mova [r0], m0
add r0, t1
add r1, t1
dec t0
add r0, r2
add r1, r2
dec r3d
jnz .nextrow
REP_RET
cglobal h264_biweight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
cglobal h264_biweight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
%endmacro
INIT_XMM
@ -293,27 +255,20 @@ BIWEIGHT_FUNC sse2
BIWEIGHT_FUNC sse4
%macro BIWEIGHT_FUNC_HALF 1
cglobal h264_biweight_4x8_10_%1
BIWEIGHT_PROLOGUE 4
cglobal h264_biweight_4_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1
lea r4, [t1*2]
sar r3d, 1
lea r4, [r2*2]
.nextrow
BIWEIGHT %1, 0, t1
BIWEIGHT %1, 0, r2
movh [r0 ], m0
movhps [r0+t1], m0
movhps [r0+r2], m0
add r0, r4
add r1, r4
dec t0
dec r3d
jnz .nextrow
REP_RET
cglobal h264_biweight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
cglobal h264_biweight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
%endmacro
INIT_XMM

@ -298,63 +298,53 @@ LF_IFUNC(v, luma_intra, 10, mmxext)
/***********************************/
/* weighted prediction */
#define H264_WEIGHT(W, H, OPT) \
void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
int stride, int log2_denom, int weight, int offset);
#define H264_WEIGHT(W, OPT) \
void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, \
int stride, int height, int log2_denom, int weight, int offset);
#define H264_BIWEIGHT(W, H, OPT) \
void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
uint8_t *src, int stride, int log2_denom, int weightd, \
#define H264_BIWEIGHT(W, OPT) \
void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, \
uint8_t *src, int stride, int height, int log2_denom, int weightd, \
int weights, int offset);
#define H264_BIWEIGHT_MMX(W,H) \
H264_WEIGHT (W, H, mmx2) \
H264_BIWEIGHT(W, H, mmx2)
#define H264_BIWEIGHT_MMX_SSE(W,H) \
H264_BIWEIGHT_MMX(W, H) \
H264_WEIGHT (W, H, sse2) \
H264_BIWEIGHT (W, H, sse2) \
H264_BIWEIGHT (W, H, ssse3)
H264_BIWEIGHT_MMX_SSE(16, 16)
H264_BIWEIGHT_MMX_SSE(16, 8)
H264_BIWEIGHT_MMX_SSE( 8, 16)
H264_BIWEIGHT_MMX_SSE( 8, 8)
H264_BIWEIGHT_MMX_SSE( 8, 4)
H264_BIWEIGHT_MMX ( 4, 8)
H264_BIWEIGHT_MMX ( 4, 4)
H264_BIWEIGHT_MMX ( 4, 2)
#define H264_WEIGHT_10(W, H, DEPTH, OPT) \
void ff_h264_weight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
int stride, int log2_denom, int weight, int offset);
#define H264_BIWEIGHT_10(W, H, DEPTH, OPT) \
void ff_h264_biweight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, int stride, int log2_denom, \
#define H264_BIWEIGHT_MMX(W) \
H264_WEIGHT (W, mmx2) \
H264_BIWEIGHT(W, mmx2)
#define H264_BIWEIGHT_MMX_SSE(W) \
H264_BIWEIGHT_MMX(W) \
H264_WEIGHT (W, sse2) \
H264_BIWEIGHT (W, sse2) \
H264_BIWEIGHT (W, ssse3)
H264_BIWEIGHT_MMX_SSE(16)
H264_BIWEIGHT_MMX_SSE( 8)
H264_BIWEIGHT_MMX ( 4)
#define H264_WEIGHT_10(W, DEPTH, OPT) \
void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
int stride, int height, int log2_denom, int weight, int offset);
#define H264_BIWEIGHT_10(W, DEPTH, OPT) \
void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, \
int weightd, int weights, int offset);
#define H264_BIWEIGHT_10_SSE(W, H, DEPTH) \
H264_WEIGHT_10 (W, H, DEPTH, sse2) \
H264_WEIGHT_10 (W, H, DEPTH, sse4) \
H264_BIWEIGHT_10(W, H, DEPTH, sse2) \
H264_BIWEIGHT_10(W, H, DEPTH, sse4)
H264_BIWEIGHT_10_SSE(16, 16, 10)
H264_BIWEIGHT_10_SSE(16, 8, 10)
H264_BIWEIGHT_10_SSE( 8, 16, 10)
H264_BIWEIGHT_10_SSE( 8, 8, 10)
H264_BIWEIGHT_10_SSE( 8, 4, 10)
H264_BIWEIGHT_10_SSE( 4, 8, 10)
H264_BIWEIGHT_10_SSE( 4, 4, 10)
H264_BIWEIGHT_10_SSE( 4, 2, 10)
#define H264_BIWEIGHT_10_SSE(W, DEPTH) \
H264_WEIGHT_10 (W, DEPTH, sse2) \
H264_WEIGHT_10 (W, DEPTH, sse4) \
H264_BIWEIGHT_10(W, DEPTH, sse2) \
H264_BIWEIGHT_10(W, DEPTH, sse4)
H264_BIWEIGHT_10_SSE(16, 10)
H264_BIWEIGHT_10_SSE( 8, 10)
H264_BIWEIGHT_10_SSE( 4, 10)
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{
int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX2) {
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) {
c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
}
@ -394,23 +384,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
#endif
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_4_mmx2;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_mmx2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_mmx2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_4_mmx2;
if (mm_flags&AV_CPU_FLAG_SSE2) {
c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
@ -422,17 +402,11 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2;
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2;
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2;
c->weight_h264_pixels_tab[0]= ff_h264_weight_16_sse2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_8_sse2;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_sse2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_sse2;
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
@ -442,11 +416,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
#endif
}
if (mm_flags&AV_CPU_FLAG_SSSE3) {
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_ssse3;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_ssse3;
}
if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) {
#if HAVE_ALIGNED_STACK
@ -485,23 +456,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
#endif
c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse2;
c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse2;
c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse2;
c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse2;
c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse2;
c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse2;
c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse2;
c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse2;
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse2;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse2;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse2;
c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse2;
c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse2;
c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse2;
c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse2;
c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse2;
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
@ -513,23 +474,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
#endif
}
if (mm_flags&AV_CPU_FLAG_SSE4) {
c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse4;
c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse4;
c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse4;
c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse4;
c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse4;
c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse4;
c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse4;
c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse4;
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse4;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse4;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse4;
c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse4;
c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse4;
c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse4;
c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse4;
c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse4;
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
}
#if HAVE_AVX
if (mm_flags&AV_CPU_FLAG_AVX) {

@ -10,7 +10,7 @@ OBJS = alldevices.o avdevice.o
# input/output devices
OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \
alsa-audio-dec.o
alsa-audio-dec.o timefilter.o
OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \
alsa-audio-enc.o
OBJS-$(CONFIG_BKTR_INDEV) += bktr.o
@ -19,7 +19,7 @@ OBJS-$(CONFIG_DSHOW_INDEV) += dshow.o dshow_enummediatypes.o \
dshow_pin.o dshow_common.o
OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o
OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o
OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o
OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o timefilter.o
OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o
OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o
OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o
@ -39,4 +39,6 @@ OBJS-$(CONFIG_LIBDC1394_INDEV) += libdc1394.o
SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h
SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h
TESTPROGS = timefilter
include $(SRC_PATH)/subdir.mak

@ -33,7 +33,7 @@
#include <alsa/asoundlib.h>
#include "config.h"
#include "libavutil/log.h"
#include "libavformat/timefilter.h"
#include "timefilter.h"
#include "avdevice.h"
/* XXX: we make the assumption that the soundcard accepts this format */

@ -28,7 +28,8 @@
#include "libavutil/fifo.h"
#include "libavutil/opt.h"
#include "libavcodec/avcodec.h"
#include "libavformat/timefilter.h"
#include "libavformat/avformat.h"
#include "timefilter.h"
#include "avdevice.h"
/**

@ -24,8 +24,8 @@
#include "config.h"
#include "avformat.h"
#include "timefilter.h"
#include "libavutil/mem.h"
struct TimeFilter {
/// Delay Locked Loop data. These variables refer to mathematical

@ -22,8 +22,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFORMAT_TIMEFILTER_H
#define AVFORMAT_TIMEFILTER_H
#ifndef AVDEVICE_TIMEFILTER_H
#define AVDEVICE_TIMEFILTER_H
/**
* Opaque type representing a time filter state
@ -94,4 +94,4 @@ void ff_timefilter_reset(TimeFilter *);
*/
void ff_timefilter_destroy(TimeFilter *);
#endif /* AVFORMAT_TIMEFILTER_H */
#endif /* AVDEVICE_TIMEFILTER_H */

@ -354,11 +354,8 @@ OBJS-$(CONFIG_RTP_PROTOCOL) += rtpproto.o
OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o
OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o
# libavdevice dependencies
OBJS-$(CONFIG_ALSA_INDEV) += timefilter.o
OBJS-$(CONFIG_JACK_INDEV) += timefilter.o
TESTPROGS = seek timefilter
TESTPROGS = seek
TOOLS = pktdumper probetest
include $(SRC_PATH)/subdir.mak

@ -228,8 +228,9 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst
case AMF_DATA_TYPE_OBJECT: {
unsigned int keylen;
if (vstream && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1)
if (parse_keyframes_index(s, ioc, vstream, max_pos) < 0)
if ((vstream || astream) && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1)
if (parse_keyframes_index(s, ioc, vstream ? vstream : astream,
max_pos) < 0)
av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n");
while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) {

@ -60,10 +60,13 @@ typedef struct FLVContext {
int64_t duration_offset;
int64_t filesize_offset;
int64_t duration;
int delay; ///< first dts delay for AVC
int64_t last_ts;
} FLVContext;
typedef struct FLVStreamContext {
int delay; ///< first dts delay for each stream (needed for AVC & Speex)
int64_t last_ts; ///< last timestamp for each stream
} FLVStreamContext;
static int get_audio_flags(AVCodecContext *enc){
int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT;
@ -182,6 +185,7 @@ static int flv_write_header(AVFormatContext *s)
for(i=0; i<s->nb_streams; i++){
AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO) {
if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) {
framerate = av_q2d(s->streams[i]->r_frame_rate);
@ -199,6 +203,12 @@ static int flv_write_header(AVFormatContext *s)
return -1;
}
av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */
sc = av_mallocz(sizeof(FLVStreamContext));
if (!sc)
return AVERROR(ENOMEM);
s->streams[i]->priv_data = sc;
sc->last_ts = -1;
}
avio_write(pb, "FLV", 3);
avio_w8(pb,1);
@ -218,8 +228,6 @@ static int flv_write_header(AVFormatContext *s)
}
}
flv->last_ts = -1;
/* write meta_tag */
avio_w8(pb, 18); // tag type META
metadata_size_pos= avio_tell(pb);
@ -361,9 +369,10 @@ static int flv_write_trailer(AVFormatContext *s)
/* Add EOS tag */
for (i = 0; i < s->nb_streams; i++) {
AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc = s->streams[i]->priv_data;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO &&
(enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) {
put_avc_eos_tag(pb, flv->last_ts);
put_avc_eos_tag(pb, sc->last_ts);
}
}
@ -384,6 +393,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
AVIOContext *pb = s->pb;
AVCodecContext *enc = s->streams[pkt->stream_index]->codec;
FLVContext *flv = s->priv_data;
FLVStreamContext *sc = s->streams[pkt->stream_index]->priv_data;
unsigned ts;
int size= pkt->size;
uint8_t *data= NULL;
@ -434,20 +444,20 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n");
return -1;
}
if (!flv->delay && pkt->dts < 0)
flv->delay = -pkt->dts;
if (!sc->delay && pkt->dts < 0)
sc->delay = -pkt->dts;
ts = pkt->dts + flv->delay; // add delay to force positive dts
ts = pkt->dts + sc->delay; // add delay to force positive dts
/* check Speex packet duration */
if (enc->codec_id == CODEC_ID_SPEEX && ts - flv->last_ts > 160) {
if (enc->codec_id == CODEC_ID_SPEEX && ts - sc->last_ts > 160) {
av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than "
"8 frames per packet. Adobe Flash "
"Player cannot handle this!\n");
}
if (flv->last_ts < ts)
flv->last_ts = ts;
if (sc->last_ts < ts)
sc->last_ts = ts;
avio_wb24(pb,size + flags_size);
avio_wb24(pb,ts);
@ -471,7 +481,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
avio_write(pb, data ? data : pkt->data, size);
avio_wb32(pb,size+flags_size+11); // previous tag size
flv->duration = FFMAX(flv->duration, pkt->pts + flv->delay + pkt->duration);
flv->duration = FFMAX(flv->duration, pkt->pts + sc->delay + pkt->duration);
avio_flush(pb);

@ -35,6 +35,7 @@
#include "riff.h"
#include "isom.h"
#include "libavcodec/get_bits.h"
#include "id3v1.h"
#if CONFIG_ZLIB
#include <zlib.h>
@ -99,7 +100,7 @@ static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
return 0;
}
static int mov_metadata_int8(MOVContext *c, AVIOContext *pb,
static int mov_metadata_int8_bypass_padding(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key)
{
char buf[16];
@ -115,7 +116,7 @@ static int mov_metadata_int8(MOVContext *c, AVIOContext *pb,
return 0;
}
static int mov_metadata_stik(MOVContext *c, AVIOContext *pb,
static int mov_metadata_int8_no_padding(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key)
{
char buf[16];
@ -126,6 +127,23 @@ static int mov_metadata_stik(MOVContext *c, AVIOContext *pb,
return 0;
}
static int mov_metadata_gnre(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key)
{
short genre;
char buf[20];
avio_r8(pb); // unknown
genre = avio_r8(pb);
if (genre < 1 || genre > ID3v1_GENRE_MAX)
return 0;
snprintf(buf, sizeof(buf), "%s", ff_id3v1_genre_str[genre-1]);
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0;
}
static const uint32_t mac_to_unicode[128] = {
0x00C4,0x00C5,0x00C7,0x00C9,0x00D1,0x00D6,0x00DC,0x00E1,
0x00E0,0x00E2,0x00E4,0x00E3,0x00E5,0x00E7,0x00E9,0x00E8,
@ -189,6 +207,8 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG(0xa9,'a','l','b'): key = "album"; break;
case MKTAG(0xa9,'d','a','y'): key = "date"; break;
case MKTAG(0xa9,'g','e','n'): key = "genre"; break;
case MKTAG( 'g','n','r','e'): key = "genre";
parse = mov_metadata_gnre; break;
case MKTAG(0xa9,'t','o','o'):
case MKTAG(0xa9,'s','w','r'): key = "encoder"; break;
case MKTAG(0xa9,'e','n','c'): key = "encoder"; break;
@ -202,11 +222,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG( 'd','i','s','k'): key = "disc";
parse = mov_metadata_track_or_disc_number; break;
case MKTAG( 't','v','e','s'): key = "episode_sort";
parse = mov_metadata_int8; break;
parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 't','v','s','n'): key = "season_number";
parse = mov_metadata_int8; break;
parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 's','t','i','k'): key = "media_type";
parse = mov_metadata_stik; break;
parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'h','d','v','d'): key = "hd_video";
parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'p','g','a','p'): key = "gapless_playback";
parse = mov_metadata_int8_no_padding; break;
}
if (c->itunes_metadata && atom.size > 8) {

@ -859,6 +859,29 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
},
.flags = PIX_FMT_BE,
},
[PIX_FMT_YUV422P9LE] = {
.name = "yuv422p9le",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
},
[PIX_FMT_YUV422P9BE] = {
.name = "yuv422p9be",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
.flags = PIX_FMT_BE,
},
[PIX_FMT_YUV422P10LE] = {
.name = "yuv422p10le",
.nb_components= 3,

@ -149,12 +149,15 @@ enum PixelFormat {
PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R.
PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};
@ -182,6 +185,7 @@ enum PixelFormat {
#define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
#define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE)
#define PIX_FMT_YUV422P9 PIX_FMT_NE(YUV422P9BE , YUV422P9LE)
#define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE)
#define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
#define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE)

@ -536,6 +536,18 @@
%endif
%endmacro
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE 1
shufps %1, %1, 0
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2
pminsw %1, %3

@ -2843,6 +2843,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
#if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
@ -2852,6 +2853,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
#else
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
@ -2912,6 +2914,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
switch (srcFormat) {
#if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
@ -2922,6 +2925,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
#else
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:

@ -547,6 +547,8 @@ const char *sws_format_name(enum PixelFormat format);
#define isNBPS(x) ( \
(x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV422P10BE \
@ -574,6 +576,7 @@ const char *sws_format_name(enum PixelFormat format);
#define isPlanarYUV(x) ( \
isPlanar8YUV(x) \
|| (x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV420P10LE \
|| (x)==PIX_FMT_YUV422P10LE \
@ -583,6 +586,7 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \
|| (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV420P10BE \
|| (x)==PIX_FMT_YUV422P10BE \

@ -136,6 +136,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_YUV420P9LE] = { 1 , 1 },
[PIX_FMT_YUV420P10BE] = { 1 , 1 },
[PIX_FMT_YUV420P10LE] = { 1 , 1 },
[PIX_FMT_YUV422P9BE] = { 1 , 1 },
[PIX_FMT_YUV422P9LE] = { 1 , 1 },
[PIX_FMT_YUV422P10BE] = { 1 , 1 },
[PIX_FMT_YUV422P10LE] = { 1 , 1 },
[PIX_FMT_YUV444P9BE] = { 1 , 1 },
@ -280,15 +282,18 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
if (flags & SWS_BICUBIC) {
int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24);
int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
int64_t dd = ( d*d)>>30;
int64_t ddd= (dd*d)>>30;
if (d >= 1LL<<31) {
coeff = 0.0;
} else {
int64_t dd = (d * d) >> 30;
int64_t ddd = (dd * d) >> 30;
if (d < 1LL<<30)
coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
else if (d < 1LL<<31)
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
else
coeff=0.0;
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
}
coeff *= fone>>(30+24);
}
/* else if (flags & SWS_X) {

@ -790,8 +790,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
y_table32 = c->yuvTable;
yb = -(384<<16) - oy;
for (i = 0; i < 1024; i++) {
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase));
unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255u << abase));
y_table32[i+1024] = yval << gbase;
y_table32[i+2048] = yval << bbase;
yb += cy;

@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6

@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6

@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6

@ -42,6 +42,8 @@ yuv422p10be cea7ca6b0e66d6f29539885896c88603
yuv422p10le a10c4a5837547716f13cd61918b145f9
yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c
yuv422p16le 61bfcee8e54465f760164f5a75d40b5e
yuv422p9be 82494823944912f73cebc58ad2979bbd
yuv422p9le fc69c8a21f473916a4b4225636b97e06
yuv440p 461503fdb9b90451020aa3b25ddf041c
yuv444p 81b2eba962d12e8d64f003ac56f6faf2
yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9

@ -42,6 +42,8 @@ yuv422p10be 588fe319b96513c32e21d3e32b45447f
yuv422p10le 11b57f2bd9661024153f3973b9090cdb
yuv422p16be c092d083548c2a144c372a98c46875c7
yuv422p16le c071b9397a416d51cbe339345cbcba84
yuv422p9be 7c6f1e140b3999ee7d923854e507752a
yuv422p9le 51f10d79c07989060dd06e767e6d7d60
yuv440p 876385e96165acf51271b20e5d85a416
yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7
yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd

Loading…
Cancel
Save