Merge remote-tracking branch 'qatar/master'

* qatar/master:
  id3v2: fix doxy comment - 'machine byte order' makes no sense on char arrays
  VC1: restore mistakenly removed code
  twinvq: check output buffer size before decoding
  twinvq: return an error when the packet size is too small
  lavf: export some forgotten symbols with non-av prefixes.
  swscale: update altivec yuv2planeX asm to new per-plane API.
  swscale: make yuv2yuvX_10_sse2/avx 8/9/16-bits aware.
  yuv2planeX10 SIMD
  swscale: decide whether to use yuv2plane1/X on a per-plane basis.
  swscale: reintroduce full precision in 16-bit output.
  Split up yuv2yuvX functions
  Split out yuv2yuv1 luma and chroma in order to make them generic DSP functions
  lavc: replace references to deprecated AVCodecContext.error_recognition to use AVCodecContext.err_recognition
  lavc: translate non-flag-based er options into flag-based ef options at codec open
  add -err_filter AVOptions to access flag-based error recognition
  h264_weight: initialize "height" function argument properly.
  presets: spelling error in libvpx 1080p50_60
  avplay: fix fullscreen behaviour with SDL 1.2.14 on Mac OS X

Conflicts:
	ffplay.c
	libavformat/libavformat.v
	libswscale/swscale.c
	libswscale/x86/swscale_template.c
	tests/ref/lavfi/pixfmts_scale

Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/2/head
Michael Niedermayer 13 years ago
commit f97faf6751
  1. 2
      ffplay.c
  2. 2
      libavcodec/aacdec.c
  3. 2
      libavcodec/ac3dec.c
  4. 6
      libavcodec/alsdec.c
  5. 2
      libavcodec/h261dec.c
  6. 2
      libavcodec/h263dec.c
  7. 4
      libavcodec/h264.c
  8. 2
      libavcodec/h264_refs.c
  9. 4
      libavcodec/mjpegbdec.c
  10. 2
      libavcodec/mjpegdec.c
  11. 26
      libavcodec/mpeg12.c
  12. 10
      libavcodec/mpegaudiodec.c
  13. 4
      libavcodec/mxpegdec.c
  14. 9
      libavcodec/options.c
  15. 14
      libavcodec/twinvq.c
  16. 10
      libavcodec/utils.c
  17. 4
      libavcodec/vc1dec.c
  18. 1
      libavcodec/x86/h264_weight_10bit.asm
  19. 2
      libavformat/id3v2.h
  20. 5
      libavformat/libavformat.v
  21. 94
      libswscale/ppc/swscale_altivec.c
  22. 382
      libswscale/swscale.c
  23. 71
      libswscale/swscale_internal.h
  24. 238
      libswscale/x86/scale.asm
  25. 32
      libswscale/x86/swscale_mmx.c
  26. 206
      libswscale/x86/swscale_template.c
  27. 8
      tests/ref/lavfi/pixdesc
  28. 8
      tests/ref/lavfi/pixfmts_copy
  29. 4
      tests/ref/lavfi/pixfmts_crop
  30. 8
      tests/ref/lavfi/pixfmts_hflip
  31. 8
      tests/ref/lavfi/pixfmts_null
  32. 12
      tests/ref/lavfi/pixfmts_scale
  33. 8
      tests/ref/lavfi/pixfmts_vflip

@ -1349,7 +1349,7 @@ static int queue_picture(VideoState *is, AVFrame *src_frame, double pts1, int64_
#endif
SDL_Event event;
vp->allocated = 0;
vp->allocated = 0;
vp->reallocate = 0;
/* the allocation must be done in the main thread to avoid

@ -596,7 +596,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
int ret = set_default_channel_config(avctx, new_che_pos, ac->m4ac.chan_config);
if (!ret)
output_configure(ac, ac->che_pos, new_che_pos, ac->m4ac.chan_config, OC_GLOBAL_HDR);
else if (avctx->error_recognition >= FF_ER_EXPLODE)
else if (avctx->err_recognition & AV_EF_EXPLODE)
return AVERROR_INVALIDDATA;
}
}

@ -1359,7 +1359,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
if (s->frame_size > buf_size) {
av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
} else if (avctx->error_recognition >= FF_ER_CAREFUL) {
} else if (avctx->err_recognition & AV_EF_CRCCHECK) {
/* check for crc mismatch */
if (av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, &buf[2], s->frame_size-2)) {
av_log(avctx, AV_LOG_ERROR, "frame CRC mismatch\n");

@ -393,7 +393,7 @@ static av_cold int read_specific_config(ALSDecContext *ctx)
if (get_bits_left(&gb) < 32)
return -1;
if (avctx->error_recognition >= FF_ER_CAREFUL) {
if (avctx->err_recognition & AV_EF_CRCCHECK) {
ctx->crc_table = av_crc_get_table(AV_CRC_32_IEEE_LE);
ctx->crc = 0xFFFFFFFF;
ctx->crc_org = ~get_bits_long(&gb, 32);
@ -1476,7 +1476,7 @@ static int decode_frame(AVCodecContext *avctx,
}
// update CRC
if (sconf->crc_enabled && avctx->error_recognition >= FF_ER_CAREFUL) {
if (sconf->crc_enabled && (avctx->err_recognition & AV_EF_CRCCHECK)) {
int swap = HAVE_BIGENDIAN != sconf->msb_first;
if (ctx->avctx->bits_per_raw_sample == 24) {
@ -1710,7 +1710,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
// allocate crc buffer
if (HAVE_BIGENDIAN != sconf->msb_first && sconf->crc_enabled &&
avctx->error_recognition >= FF_ER_CAREFUL) {
(avctx->err_recognition & AV_EF_CRCCHECK)) {
ctx->crc_buffer = av_malloc(sizeof(*ctx->crc_buffer) *
ctx->cur_frame_length *
avctx->channels *

@ -136,7 +136,7 @@ static int h261_decode_gob_header(H261Context *h){
if(s->qscale==0) {
av_log(s->avctx, AV_LOG_ERROR, "qscale has forbidden 0 value\n");
if (s->avctx->error_recognition >= FF_ER_COMPLIANT)
if (s->avctx->err_recognition & AV_EF_BITSTREAM)
return -1;
}

@ -732,7 +732,7 @@ intrax8_decoded:
av_log(avctx, AV_LOG_DEBUG, "%"PRId64"\n", rdtsc()-time);
#endif
return (ret && avctx->error_recognition >= FF_ER_EXPLODE)?ret:get_consumed_bytes(s, buf_size);
return (ret && (avctx->err_recognition & AV_EF_EXPLODE))?ret:get_consumed_bytes(s, buf_size);
}
AVCodec ff_h263_decoder = {

@ -2893,7 +2893,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
ff_generate_sliding_window_mmcos(h);
if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
s->avctx->error_recognition >= FF_ER_EXPLODE)
(s->avctx->err_recognition & AV_EF_EXPLODE))
return AVERROR_INVALIDDATA;
/* Error concealment: if a ref is missing, copy the previous ref in its place.
* FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
@ -3072,7 +3072,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
}
if(h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
s->avctx->error_recognition >= FF_ER_EXPLODE)
(s->avctx->err_recognition & AV_EF_EXPLODE))
return AVERROR_INVALIDDATA;
if(FRAME_MBAFF){

@ -654,7 +654,7 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
print_short_term(h);
print_long_term(h);
return h->s.avctx->error_recognition >= FF_ER_EXPLODE ? err : 0;
return (h->s.avctx->err_recognition & AV_EF_EXPLODE) ? err : 0;
}
int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb){

@ -82,7 +82,7 @@ read_header:
init_get_bits(&s->gb, buf_ptr+dqt_offs, (buf_end - (buf_ptr+dqt_offs))*8);
s->start_code = DQT;
if (ff_mjpeg_decode_dqt(s) < 0 &&
avctx->error_recognition >= FF_ER_EXPLODE)
(avctx->err_recognition & AV_EF_EXPLODE))
return AVERROR_INVALIDDATA;
}
@ -116,7 +116,7 @@ read_header:
s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
s->start_code = SOS;
if (ff_mjpeg_decode_sos(s, NULL, NULL) < 0 &&
avctx->error_recognition >= FF_ER_EXPLODE)
(avctx->err_recognition & AV_EF_EXPLODE))
return AVERROR_INVALIDDATA;
}

@ -1522,7 +1522,7 @@ eoi_parser:
break;
}
if (ff_mjpeg_decode_sos(s, NULL, NULL) < 0 &&
avctx->error_recognition >= FF_ER_EXPLODE)
(avctx->err_recognition & AV_EF_EXPLODE))
return AVERROR_INVALIDDATA;
break;
case DRI:

@ -1378,7 +1378,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
if (s->pict_type == AV_PICTURE_TYPE_P || s->pict_type == AV_PICTURE_TYPE_B) {
s->full_pel[0] = get_bits1(&s->gb);
f_code = get_bits(&s->gb, 3);
if (f_code == 0 && avctx->error_recognition >= FF_ER_COMPLIANT)
if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
return -1;
s->mpeg_f_code[0][0] = f_code;
s->mpeg_f_code[0][1] = f_code;
@ -1386,7 +1386,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
if (s->pict_type == AV_PICTURE_TYPE_B) {
s->full_pel[1] = get_bits1(&s->gb);
f_code = get_bits(&s->gb, 3);
if (f_code == 0 && avctx->error_recognition >= FF_ER_COMPLIANT)
if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
return -1;
s->mpeg_f_code[1][0] = f_code;
s->mpeg_f_code[1][1] = f_code;
@ -1819,7 +1819,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
&& s->progressive_frame == 0 /* vbv_delay == 0xBBB || 0xE10*/;
if (left < 0 || (left && show_bits(&s->gb, FFMIN(left, 23)) && !is_d10)
|| (avctx->error_recognition >= FF_ER_AGGRESSIVE && left > 8)) {
|| ((avctx->err_recognition & AV_EF_BUFFER) && left > 8)) {
av_log(avctx, AV_LOG_ERROR, "end mismatch left=%d %0X\n", left, show_bits(&s->gb, FFMIN(left, 23)));
return -1;
} else
@ -1911,7 +1911,7 @@ static int slice_decode_thread(AVCodecContext *c, void *arg)
//av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n",
//ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count);
if (ret < 0) {
if (c->error_recognition >= FF_ER_EXPLODE)
if (c->err_recognition & AV_EF_EXPLODE)
return ret;
if (s->resync_mb_x >= 0 && s->resync_mb_y >= 0)
ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, AC_ERROR | DC_ERROR | MV_ERROR);
@ -1999,7 +1999,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
s->aspect_ratio_info = get_bits(&s->gb, 4);
if (s->aspect_ratio_info == 0) {
av_log(avctx, AV_LOG_ERROR, "aspect ratio has forbidden 0 value\n");
if (avctx->error_recognition >= FF_ER_COMPLIANT)
if (avctx->err_recognition & AV_EF_BITSTREAM)
return -1;
}
s->frame_rate_index = get_bits(&s->gb, 4);
@ -2287,7 +2287,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
if (avctx->extradata && !avctx->frame_number) {
int ret = decode_chunks(avctx, picture, data_size, avctx->extradata, avctx->extradata_size);
if (ret < 0 && avctx->error_recognition >= FF_ER_EXPLODE)
if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
return ret;
}
@ -2347,7 +2347,7 @@ static int decode_chunks(AVCodecContext *avctx,
s->sync=1;
} else {
av_log(avctx, AV_LOG_ERROR, "ignoring SEQ_START_CODE after %X\n", last_code);
if (avctx->error_recognition >= FF_ER_EXPLODE)
if (avctx->err_recognition & AV_EF_EXPLODE)
return AVERROR_INVALIDDATA;
}
break;
@ -2381,7 +2381,7 @@ static int decode_chunks(AVCodecContext *avctx,
last_code = PICTURE_START_CODE;
} else {
av_log(avctx, AV_LOG_ERROR, "ignoring pic after %X\n", last_code);
if (avctx->error_recognition >= FF_ER_EXPLODE)
if (avctx->err_recognition & AV_EF_EXPLODE)
return AVERROR_INVALIDDATA;
}
break;
@ -2394,7 +2394,7 @@ static int decode_chunks(AVCodecContext *avctx,
mpeg_decode_sequence_extension(s);
} else {
av_log(avctx, AV_LOG_ERROR, "ignoring seq ext after %X\n", last_code);
if (avctx->error_recognition >= FF_ER_EXPLODE)
if (avctx->err_recognition & AV_EF_EXPLODE)
return AVERROR_INVALIDDATA;
}
break;
@ -2412,7 +2412,7 @@ static int decode_chunks(AVCodecContext *avctx,
mpeg_decode_picture_coding_extension(s);
} else {
av_log(avctx, AV_LOG_ERROR, "ignoring pic cod ext after %X\n", last_code);
if (avctx->error_recognition >= FF_ER_EXPLODE)
if (avctx->err_recognition & AV_EF_EXPLODE)
return AVERROR_INVALIDDATA;
}
break;
@ -2428,7 +2428,7 @@ static int decode_chunks(AVCodecContext *avctx,
s->sync=1;
} else {
av_log(avctx, AV_LOG_ERROR, "ignoring GOP_START_CODE after %X\n", last_code);
if (avctx->error_recognition >= FF_ER_EXPLODE)
if (avctx->err_recognition & AV_EF_EXPLODE)
return AVERROR_INVALIDDATA;
}
break;
@ -2475,7 +2475,7 @@ static int decode_chunks(AVCodecContext *avctx,
if (!s2->pict_type) {
av_log(avctx, AV_LOG_ERROR, "Missing picture start code\n");
if (avctx->error_recognition >= FF_ER_EXPLODE)
if (avctx->err_recognition & AV_EF_EXPLODE)
return AVERROR_INVALIDDATA;
break;
}
@ -2516,7 +2516,7 @@ static int decode_chunks(AVCodecContext *avctx,
emms_c();
if (ret < 0) {
if (avctx->error_recognition >= FF_ER_EXPLODE)
if (avctx->err_recognition & AV_EF_EXPLODE)
return ret;
if (s2->resync_mb_x >= 0 && s2->resync_mb_y >= 0)
ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR | DC_ERROR | MV_ERROR);

@ -79,7 +79,7 @@ typedef struct MPADecodeContext {
#endif
int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
int dither_state;
int error_recognition;
int err_recognition;
AVCodecContext* avctx;
MPADSPContext mpadsp;
} MPADecodeContext;
@ -280,7 +280,7 @@ static av_cold int decode_init(AVCodecContext * avctx)
ff_mpadsp_init(&s->mpadsp);
avctx->sample_fmt= OUT_FMT;
s->error_recognition= avctx->error_recognition;
s->err_recognition = avctx->err_recognition;
if (!init && !avctx->parse_only) {
int offset;
@ -1104,7 +1104,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
s_index -= 4;
skip_bits_long(&s->gb, last_pos - pos);
av_log(s->avctx, AV_LOG_INFO, "overread, skip %d enddists: %d %d\n", last_pos - pos, end_pos-pos, end_pos2-pos);
if(s->error_recognition >= FF_ER_COMPLIANT)
if(s->err_recognition & AV_EF_BITSTREAM)
s_index=0;
break;
}
@ -1134,10 +1134,10 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
/* skip extension bits */
bits_left = end_pos2 - get_bits_count(&s->gb);
//av_log(NULL, AV_LOG_ERROR, "left:%d buf:%p\n", bits_left, s->in_gb.buffer);
if (bits_left < 0 && s->error_recognition >= FF_ER_COMPLIANT) {
if (bits_left < 0 && (s->err_recognition & AV_EF_BITSTREAM)) {
av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
s_index=0;
}else if(bits_left > 0 && s->error_recognition >= FF_ER_AGGRESSIVE){
}else if(bits_left > 0 && (s->err_recognition & AV_EF_BUFFER)){
av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
s_index=0;
}

@ -275,11 +275,11 @@ static int mxpeg_decode_frame(AVCodecContext *avctx,
}
ret = ff_mjpeg_decode_sos(jpg, s->mxm_bitmask, reference_ptr);
if (ret < 0 && avctx->error_recognition >= FF_ER_EXPLODE)
if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
return ret;
} else {
ret = ff_mjpeg_decode_sos(jpg, NULL, NULL);
if (ret < 0 && avctx->error_recognition >= FF_ER_EXPLODE)
if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
return ret;
}

@ -201,14 +201,19 @@ static const AVOption options[]={
{"unofficial", "allow unofficial extensions", 0, AV_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_UNOFFICIAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
{"experimental", "allow non standardized experimental things", 0, AV_OPT_TYPE_CONST, {.dbl = FF_COMPLIANCE_EXPERIMENTAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
{"b_qoffset", "qp offset between P and B frames", OFFSET(b_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
#if FF_API_ER
{"er", "set error detection aggressivity", OFFSET(error_recognition), AV_OPT_TYPE_INT, {.dbl = FF_ER_CAREFUL }, INT_MIN, INT_MAX, A|V|D, "er"},
{"careful", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_CAREFUL }, INT_MIN, INT_MAX, V|D, "er"},
{"compliant", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_COMPLIANT }, INT_MIN, INT_MAX, V|D, "er"},
{"aggressive", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_AGGRESSIVE }, INT_MIN, INT_MAX, V|D, "er"},
#if FF_API_ER
{"very_aggressive", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_VERY_AGGRESSIVE }, INT_MIN, INT_MAX, V|D, "er"},
#endif /* FF_API_ER */
{"explode", "abort decoding on error recognition", 0, AV_OPT_TYPE_CONST, {.dbl = FF_ER_EXPLODE }, INT_MIN, INT_MAX, V|D, "er"},
#endif /* FF_API_ER */
{"err_filter", "set error detection filter flags", OFFSET(err_recognition), AV_OPT_TYPE_FLAGS, {.dbl = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, A|V|D, "err_filter"},
{"crccheck", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, V|D, "err_filter"},
{"bitstream", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_BITSTREAM }, INT_MIN, INT_MAX, V|D, "err_filter"},
{"buffer", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_BUFFER }, INT_MIN, INT_MAX, V|D, "err_filter"},
{"explode", "abort decoding on minor error recognition", 0, AV_OPT_TYPE_CONST, {.dbl = AV_EF_EXPLODE }, INT_MIN, INT_MAX, V|D, "err_filter"},
{"has_b_frames", NULL, OFFSET(has_b_frames), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
{"block_align", NULL, OFFSET(block_align), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},
{"parse_only", NULL, OFFSET(parse_only), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX},

@ -822,7 +822,7 @@ static int twin_decode_frame(AVCodecContext * avctx, void *data,
const ModeTab *mtab = tctx->mtab;
float *out = data;
enum FrameType ftype;
int window_type;
int window_type, out_size;
static const enum FrameType wtype_to_ftype_table[] = {
FT_LONG, FT_LONG, FT_SHORT, FT_LONG,
FT_MEDIUM, FT_LONG, FT_LONG, FT_MEDIUM, FT_MEDIUM
@ -831,8 +831,14 @@ static int twin_decode_frame(AVCodecContext * avctx, void *data,
if (buf_size*8 < avctx->bit_rate*mtab->size/avctx->sample_rate + 8) {
av_log(avctx, AV_LOG_ERROR,
"Frame too small (%d bytes). Truncated file?\n", buf_size);
*data_size = 0;
return buf_size;
return AVERROR(EINVAL);
}
out_size = mtab->size * avctx->channels *
av_get_bytes_per_sample(avctx->sample_fmt);
if (*data_size < out_size) {
av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
return AVERROR(EINVAL);
}
init_get_bits(&gb, buf, buf_size * 8);
@ -857,7 +863,7 @@ static int twin_decode_frame(AVCodecContext * avctx, void *data,
return buf_size;
}
*data_size = mtab->size*avctx->channels*4;
*data_size = out_size;
return buf_size;
}

@ -610,6 +610,16 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVD
goto free_and_end;
}
avctx->frame_number = 0;
#if FF_API_ER
av_log(avctx, AV_LOG_DEBUG, "err{or,}_recognition separate: %d; %d\n",
avctx->error_recognition, avctx->err_recognition);
/* FF_ER_CAREFUL (==1) implies AV_EF_CRCCHECK (== 1<<1 - 1),
FF_ER_COMPLIANT (==2) implies AV_EF_{CRCCHECK,BITSTREAM} (== 1<<2 - 1), et cetera} */
avctx->err_recognition |= (1<<(avctx->error_recognition-(avctx->error_recognition>=FF_ER_VERY_AGGRESSIVE))) - 1;
av_log(avctx, AV_LOG_DEBUG, "err{or,}_recognition combined: %d; %d\n",
avctx->error_recognition, avctx->err_recognition);
#endif
if (!HAVE_THREADS)
av_log(avctx, AV_LOG_WARNING, "Warning: not compiled with thread support, using thread emulation\n");

@ -930,6 +930,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
if (!v->field_mode || (v->field_mode && !v->numref)) {
valid_count = get_chroma_mv(mvx, mvy, intra, 0, &tx, &ty);
if (!valid_count) {
s->current_picture.f.motion_val[1][s->block_index[0]][0] = 0;
s->current_picture.f.motion_val[1][s->block_index[0]][1] = 0;
v->luma_mv[s->mb_x][0] = v->luma_mv[s->mb_x][1] = 0;
return; //no need to do MC for intra blocks
}
@ -941,6 +943,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
if (dominant)
chroma_ref_type = !v->cur_field_type;
}
s->current_picture.f.motion_val[1][s->block_index[0]][0] = tx;
s->current_picture.f.motion_val[1][s->block_index[0]][1] = ty;
uvmx = (tx + ((tx & 3) == 3)) >> 1;
uvmy = (ty + ((ty & 3) == 3)) >> 1;

@ -44,6 +44,7 @@ SECTION .text
PROLOGUE 0,6,8
movifnidn r0, r0mp
movifnidn r1d, r1m
movifnidn r2d, r2m
movifnidn r4d, r4m
movifnidn r5d, r5m
%endmacro

@ -62,7 +62,7 @@ typedef struct ID3v2ExtraMetaGEOB {
/**
* Detect ID3v2 Header.
* @param buf must be ID3v2_HEADER_SIZE byte long
* @param magic magic bytes to identify the header, machine byte order.
* @param magic magic bytes to identify the header.
* If in doubt, use ID3v2_DEFAULT_MAGIC.
*/
int ff_id3v2_match(const uint8_t *buf, const char *magic);

@ -23,5 +23,10 @@ LIBAVFORMAT_$MAJOR {
ff_timefilter_new;
ff_timefilter_update;
ff_timefilter_reset;
get_*;
put_*;
udp_set_remote_url;
udp_get_local_port;
init_checksum;
local: *;
};

@ -94,34 +94,29 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
//FIXME remove the usage of scratch buffers.
static void
yuv2yuvX_altivec_real(SwsContext *c,
const int16_t *lumFilter, const int16_t **lumSrc,
int lumFilterSize, const int16_t *chrFilter,
const int16_t **chrUSrc, const int16_t **chrVSrc,
int chrFilterSize, const int16_t **alpSrc,
uint8_t *dest[4], int dstW, int chrDstW)
yuv2planeX_altivec(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2];
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
register int i, j;
{
DECLARE_ALIGNED(16, int, val)[dstW];
for (i=0; i<dstW; i++)
val[i] = lumDither[i & 7] << 12;
val[i] = dither[(i + offset) & 7] << 12;
for (j = 0; j < lumFilterSize; j++) {
vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter);
vector unsigned char perm, perm0 = vec_lvsl(j << 1, lumFilter);
for (j = 0; j < filterSize; j++) {
vector signed short l1, vLumFilter = vec_ld(j << 1, filter);
vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter);
vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
perm = vec_lvsl(0, lumSrc[j]);
l1 = vec_ld(0, lumSrc[j]);
perm = vec_lvsl(0, src[j]);
l1 = vec_ld(0, src[j]);
for (i = 0; i < (dstW - 7); i+=8) {
int offset = i << 2;
vector signed short l2 = vec_ld((i << 1) + 16, lumSrc[j]);
vector signed short l2 = vec_ld((i << 1) + 16, src[j]);
vector signed int v1 = vec_ld(offset, val);
vector signed int v2 = vec_ld(offset + 16, val);
@ -143,73 +138,10 @@ yuv2yuvX_altivec_real(SwsContext *c,
l1 = l2;
}
for ( ; i < dstW; i++) {
val[i] += lumSrc[j][i] * lumFilter[j];
val[i] += src[j][i] * filter[j];
}
}
altivec_packIntArrayToCharArray(val, yDest, dstW);
}
if (uDest != 0) {
DECLARE_ALIGNED(16, int, u)[chrDstW];
DECLARE_ALIGNED(16, int, v)[chrDstW];
for (i=0; i<chrDstW; i++) {
u[i] = chrDither[i & 7] << 12;
v[i] = chrDither[(i + 3) & 7] << 12;
}
for (j = 0; j < chrFilterSize; j++) {
vector signed short l1, l1_V, vChrFilter = vec_ld(j << 1, chrFilter);
vector unsigned char perm, perm0 = vec_lvsl(j << 1, chrFilter);
vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
perm = vec_lvsl(0, chrUSrc[j]);
l1 = vec_ld(0, chrUSrc[j]);
l1_V = vec_ld(0, chrVSrc[j]);
for (i = 0; i < (chrDstW - 7); i+=8) {
int offset = i << 2;
vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]);
vector signed int v1 = vec_ld(offset, u);
vector signed int v2 = vec_ld(offset + 16, u);
vector signed int v1_V = vec_ld(offset, v);
vector signed int v2_V = vec_ld(offset + 16, v);
vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7]
vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i]
vector signed int i1 = vec_mule(vChrFilter, ls);
vector signed int i2 = vec_mulo(vChrFilter, ls);
vector signed int i1_V = vec_mule(vChrFilter, ls_V);
vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
vector signed int vf1 = vec_mergeh(i1, i2);
vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]
vector signed int vo1 = vec_add(v1, vf1);
vector signed int vo2 = vec_add(v2, vf2);
vector signed int vo1_V = vec_add(v1_V, vf1_V);
vector signed int vo2_V = vec_add(v2_V, vf2_V);
vec_st(vo1, offset, u);
vec_st(vo2, offset + 16, u);
vec_st(vo1_V, offset, v);
vec_st(vo2_V, offset + 16, v);
l1 = l2;
l1_V = l2_V;
}
for ( ; i < chrDstW; i++) {
u[i] += chrUSrc[j][i] * chrFilter[j];
v[i] += chrVSrc[j][i] * chrFilter[j];
}
}
altivec_packIntArrayToCharArray(u, uDest, chrDstW);
altivec_packIntArrayToCharArray(v, vDest, chrDstW);
altivec_packIntArrayToCharArray(val, dest, dstW);
}
}
@ -405,7 +337,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 &&
!c->alpPixBuf) {
c->yuv2yuvX = yuv2yuvX_altivec_real;
c->yuv2planeX = yuv2planeX_altivec;
}
/* The following list of supported dstFormat values should

@ -290,252 +290,150 @@ const uint16_t dither_scale[15][16]={
{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
};
#define output_pixel(pos, val, bias, signedness) \
if (big_endian) { \
AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
} else { \
AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
}
static av_always_inline void
yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
int lumFilterSize, const int16_t *chrFilter,
const int32_t **chrUSrc, const int32_t **chrVSrc,
int chrFilterSize, const int32_t **alpSrc,
uint16_t *dest[4], int dstW, int chrDstW,
int big_endian, int output_bits)
yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
int big_endian, int output_bits)
{
//FIXME Optimize (just quickly written not optimized..)
int i;
int dword= output_bits == 16;
uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
int shift = 11 + 4*dword + 16 - output_bits - 1;
int shift = 19 - output_bits;
#define output_pixel(pos, val) \
if (big_endian) { \
AV_WB16(pos, av_clip_uint16(val >> shift)); \
} else { \
AV_WL16(pos, av_clip_uint16(val >> shift)); \
for (i = 0; i < dstW; i++) {
int val = src[i] + (1 << (shift - 1));
output_pixel(&dest[i], val, 0, uint);
}
}
static av_always_inline void
yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
const int32_t **src, uint16_t *dest, int dstW,
int big_endian, int output_bits)
{
int i;
int dword= output_bits == 16;
int shift = 15 + 16 - output_bits;
for (i = 0; i < dstW; i++) {
int val = 1 << (26-output_bits + 4*dword - 1);
int val = 1 << (26-output_bits + 4*dword);
int j;
for (j = 0; j < lumFilterSize; j++)
val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
/* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
* filters (or anything with negative coeffs, the range can be slightly
* wider in both directions. To account for this overflow, we subtract
* a constant so it always fits in the signed range (assuming a
* reasonable filterSize), and re-add that at the end. */
val -= 0x40000000;
for (j = 0; j < filterSize; j++)
val += src[j][i] * filter[j];
output_pixel(&yDest[i], val);
output_pixel(&dest[i], val, 0x8000, int);
}
}
if (uDest) {
for (i = 0; i < chrDstW; i++) {
int u = 1 << (26-output_bits + 4*dword - 1);
int v = 1 << (26-output_bits + 4*dword - 1);
int j;
for (j = 0; j < chrFilterSize; j++) {
u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
}
#undef output_pixel
output_pixel(&uDest[i], u);
output_pixel(&vDest[i], v);
}
#define output_pixel(pos, val) \
if (big_endian) { \
AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
} else { \
AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
}
if (CONFIG_SWSCALE_ALPHA && aDest) {
for (i = 0; i < dstW; i++) {
int val = 1 << (26-output_bits + 4*dword - 1);
int j;
for (j = 0; j < lumFilterSize; j++)
val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
static av_always_inline void
yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
int big_endian, int output_bits)
{
int i;
int shift = 15 - output_bits;
output_pixel(&aDest[i], val);
}
for (i = 0; i < dstW; i++) {
int val = src[i] + (1 << (shift - 1));
output_pixel(&dest[i], val);
}
#undef output_pixel
}
static av_always_inline void
yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
int lumFilterSize, const int16_t *chrFilter,
const int16_t **chrUSrc, const int16_t **chrVSrc,
int chrFilterSize, const int16_t **alpSrc,
uint16_t *dest[4], int dstW, int chrDstW,
int big_endian, int output_bits)
yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
const int16_t **src, uint16_t *dest, int dstW,
int big_endian, int output_bits)
{
//FIXME Optimize (just quickly written not optimized..)
int i;
uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
int shift = 11 + 16 - output_bits;
#define output_pixel(pos, val) \
if (big_endian) { \
AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
} else { \
AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
}
for (i = 0; i < dstW; i++) {
int val = 1 << (26-output_bits);
int j;
for (j = 0; j < lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
for (j = 0; j < filterSize; j++)
val += src[j][i] * filter[j];
output_pixel(&yDest[i], val);
output_pixel(&dest[i], val);
}
}
if (uDest) {
for (i = 0; i < chrDstW; i++) {
int u = 1 << (26-output_bits);
int v = 1 << (26-output_bits);
int j;
for (j = 0; j < chrFilterSize; j++) {
u += chrUSrc[j][i] * chrFilter[j];
v += chrVSrc[j][i] * chrFilter[j];
}
output_pixel(&uDest[i], u);
output_pixel(&vDest[i], v);
}
}
if (CONFIG_SWSCALE_ALPHA && aDest) {
for (i = 0; i < dstW; i++) {
int val = 1 << (26-output_bits);
int j;
for (j = 0; j < lumFilterSize; j++)
val += alpSrc[j][i] * lumFilter[j];
output_pixel(&aDest[i], val);
}
}
#undef output_pixel
}
#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
const int16_t **_lumSrc, int lumFilterSize, \
const int16_t *chrFilter, const int16_t **_chrUSrc, \
const int16_t **_chrVSrc, \
int chrFilterSize, const int16_t **_alpSrc, \
uint8_t *_dest[4], int dstW, int chrDstW) \
#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
uint8_t *dest, int dstW, \
const uint8_t *dither, int offset)\
{ \
yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
(uint16_t *) dest, dstW, is_be, bits); \
}\
static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
const int16_t **src, uint8_t *dest, int dstW, \
const uint8_t *dither, int offset)\
{ \
const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
**chrUSrc = (const typeX_t **) _chrUSrc, \
**chrVSrc = (const typeX_t **) _chrVSrc, \
**alpSrc = (const typeX_t **) _alpSrc; \
yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
alpSrc, (uint16_t **) _dest, \
dstW, chrDstW, is_be, bits); \
}
yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc,
int chrFilterSize, const int16_t **alpSrc,
uint8_t *dest[4], int dstW, int chrDstW)
{
uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
int i;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
//FIXME Optimize (just quickly written not optimized..)
yuv2planeX_## template_size ## _c_template(filter, \
filterSize, (const typeX_t **) src, \
(uint16_t *) dest, dstW, is_be, bits); \
}
yuv2NBPS( 9, BE, 1, 10, int16_t);
yuv2NBPS( 9, LE, 0, 10, int16_t);
yuv2NBPS(10, BE, 1, 10, int16_t);
yuv2NBPS(10, LE, 0, 10, int16_t);
yuv2NBPS(16, BE, 1, 16, int32_t);
yuv2NBPS(16, LE, 0, 16, int32_t);
static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
int i;
for (i=0; i<dstW; i++) {
int val = lumDither[i & 7] << 12;
int val = dither[(i + offset) & 7] << 12;
int j;
for (j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
for (j=0; j<filterSize; j++)
val += src[j][i] * filter[j];
yDest[i]= av_clip_uint8(val>>19);
dest[i]= av_clip_uint8(val>>19);
}
if (uDest)
for (i=0; i<chrDstW; i++) {
int u = chrDither[i & 7] << 12;
int v = chrDither[(i + 3) & 7] << 12;
int j;
for (j=0; j<chrFilterSize; j++) {
u += chrUSrc[j][i] * chrFilter[j];
v += chrVSrc[j][i] * chrFilter[j];
}
uDest[i]= av_clip_uint8(u>>19);
vDest[i]= av_clip_uint8(v>>19);
}
if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++) {
int val = lumDither[i & 7] << 12;
int j;
for (j=0; j<lumFilterSize; j++)
val += alpSrc[j][i] * lumFilter[j];
aDest[i]= av_clip_uint8(val>>19);
}
}
static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
const int16_t *chrUSrc, const int16_t *chrVSrc,
const int16_t *alpSrc,
uint8_t *dest[4], int dstW, int chrDstW)
static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
int i;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
for (i=0; i<dstW; i++) {
int val = (lumSrc[i]+ lumDither[i & 7]) >> 7;
yDest[i]= av_clip_uint8(val);
int val = (src[i] + dither[(i + offset) & 7]) >> 7;
dest[i]= av_clip_uint8(val);
}
if (uDest)
for (i=0; i<chrDstW; i++) {
int u = (chrUSrc[i] + chrDither[i & 7]) >> 7;
int v = (chrVSrc[i] + chrDither[(i + 3) & 7]) >> 7;
uDest[i]= av_clip_uint8(u);
vDest[i]= av_clip_uint8(v);
}
if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++) {
int val = (alpSrc[i] + lumDither[i & 7]) >> 7;
aDest[i]= av_clip_uint8(val);
}
}
static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest[4],
int dstW, int chrDstW)
static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
const int16_t **chrUSrc, const int16_t **chrVSrc,
uint8_t *dest, int chrDstW)
{
uint8_t *yDest = dest[0], *uDest = dest[1];
enum PixelFormat dstFormat = c->dstFormat;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
//FIXME Optimize (just quickly written not optimized..)
const uint8_t *chrDither = c->chrDither8;
int i;
for (i=0; i<dstW; i++) {
int val = lumDither[i & 7] << 12;
int j;
for (j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
yDest[i]= av_clip_uint8(val>>19);
}
if (!uDest)
return;
if (dstFormat == PIX_FMT_NV12)
for (i=0; i<chrDstW; i++) {
@ -547,8 +445,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
v += chrVSrc[j][i] * chrFilter[j];
}
uDest[2*i]= av_clip_uint8(u>>19);
uDest[2*i+1]= av_clip_uint8(v>>19);
dest[2*i]= av_clip_uint8(u>>19);
dest[2*i+1]= av_clip_uint8(v>>19);
}
else
for (i=0; i<chrDstW; i++) {
@ -560,8 +458,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
v += chrVSrc[j][i] * chrFilter[j];
}
uDest[2*i]= av_clip_uint8(v>>19);
uDest[2*i+1]= av_clip_uint8(u>>19);
dest[2*i]= av_clip_uint8(v>>19);
dest[2*i+1]= av_clip_uint8(u>>19);
}
}
@ -2310,26 +2208,31 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
static av_always_inline void
find_c_packed_planar_out_funcs(SwsContext *c,
yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
yuv2interleavedX_fn *yuv2nv12cX,
yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
yuv2packedX_fn *yuv2packedX)
{
enum PixelFormat dstFormat = c->dstFormat;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
*yuv2yuvX = yuv2nv12X_c;
} else if (is16BPS(dstFormat)) {
*yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
if (is16BPS(dstFormat)) {
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
} else if (is9_OR_10BPS(dstFormat)) {
if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
*yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
} else {
*yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
}
} else {
*yuv2yuv1 = yuv2yuv1_c;
*yuv2yuvX = yuv2yuvX_c;
*yuv2plane1 = yuv2plane1_8_c;
*yuv2planeX = yuv2planeX_8_c;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
*yuv2nv12cX = yuv2nv12cX_c;
}
if(c->flags & SWS_FULL_CHR_H_INT) {
switch (dstFormat) {
case PIX_FMT_RGBA:
@ -2591,10 +2494,11 @@ static int swScale(SwsContext *c, const uint8_t* src[],
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
int lastDstY;
uint32_t *pal=c->pal_yuv;
int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
yuv2planarX_fn yuv2planeX = c->yuv2planeX;
yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
yuv2packedX_fn yuv2packedX = c->yuv2packedX;
@ -2748,9 +2652,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
}
if (dstY >= dstH-2) {
// hmm looks like we can't use MMX here without overwriting this array's tail
find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
&yuv2packed1, &yuv2packed2,
&yuv2packedX);
find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
&yuv2packed1, &yuv2packed2, &yuv2packedX);
}
{
@ -2761,18 +2664,35 @@ static int swScale(SwsContext *c, const uint8_t* src[],
if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat))
dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
yuv2yuv1(c, lumSrcPtr[0], chrUSrcPtr[0], chrVSrcPtr[0], alpBuf,
dest, dstW, chrDstW);
} else { //General YV12
yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
lumSrcPtr, vLumFilterSize,
vChrFilter + chrDstY * vChrFilterSize,
chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, chrDstW);
if (vLumFilterSize == 1) {
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
} else {
yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
}
if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
if (yuv2nv12cX) {
yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
} else if (vChrFilterSize == 1) {
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
} else {
yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
}
}
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
if (vLumFilterSize == 1) {
yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
} else {
yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
}
}
} else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
@ -2826,8 +2746,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
{
enum PixelFormat srcFormat = c->srcFormat;
find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
&c->yuv2packed1, &c->yuv2packed2,
find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
&c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
&c->yuv2packedX);
c->chrToYV12 = NULL;

@ -61,56 +61,58 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t* dst[], int dstStride[]);
/**
* Write one line of horizontally scaled Y/U/V/A to planar output
* Write one line of horizontally scaled data to planar output
* without any additional vertical scaling (or point-scaling).
*
* @param c SWS scaling context
* @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
* @param src scaled source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit
* @param dest pointer to the output plane. For >8bit
* output, this is in uint16_t
* @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels
* @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
* @param dstW width of destination in pixels
* @param dither ordered dither array of type int16_t and size 8
* @param offset Dither offset
*/
typedef void (*yuv2planar1_fn) (struct SwsContext *c,
const int16_t *lumSrc, const int16_t *chrUSrc,
const int16_t *chrVSrc, const int16_t *alpSrc,
uint8_t *dest[4], int dstW, int chrDstW);
typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
/**
* Write one line of horizontally scaled Y/U/V/A to planar output
* Write one line of horizontally scaled data to planar output
* with multi-point vertical scaling between input pixels.
*
* @param c SWS scaling context
* @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096]
* @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output,
* @param filter vertical luma/alpha scaling coefficients, 12bit [0,4096]
* @param src scaled luma (Y) or alpha (A) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param lumFilterSize number of vertical luma/alpha input lines to scale
* @param filterSize number of vertical input lines to scale
* @param dest pointer to output plane. For >8bit
* output, this is in uint16_t
* @param dstW width of destination pixels
* @param offset Dither offset
*/
typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
/**
* Write one line of horizontally scaled chroma to interleaved output
* with multi-point vertical scaling between input pixels.
*
* @param c SWS scaling context
* @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096]
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param chrFilterSize number of vertical chroma input lines to scale
* @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit
* @param dest pointer to the output plane. For >8bit
* output, this is in uint16_t
* @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels
* @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
* @param dstW width of chroma planes
*/
typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest[4],
int dstW, int chrDstW);
typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
const int16_t **chrUSrc, const int16_t **chrVSrc,
uint8_t *dest, int dstW);
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
* output without any additional vertical scaling (or point-scaling). Note
@ -412,8 +414,9 @@ typedef struct SwsContext {
#endif
/* function pointers for swScale() */
yuv2planar1_fn yuv2yuv1;
yuv2planarX_fn yuv2yuvX;
yuv2planar1_fn yuv2plane1;
yuv2planarX_fn yuv2planeX;
yuv2interleavedX_fn yuv2nv12cX;
yuv2packed1_fn yuv2packed1;
yuv2packed2_fn yuv2packed2;
yuv2packedX_fn yuv2packedX;

@ -1,6 +1,7 @@
;******************************************************************************
;* x86-optimized horizontal line scaling functions
;* x86-optimized horizontal/vertical line scaling functions
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;* Kieran Kunhya <kieran@kunhya.com>
;*
;* This file is part of Libav.
;*
@ -28,6 +29,11 @@ max_19bit_int: times 4 dd 0x7ffff
max_19bit_flt: times 4 dd 524287.0
minshort: times 8 dw 0x8000
unicoeff: times 4 dd 0x20000000
yuv2yuvX_16_start: times 4 dd 0x4000 - 0x40000000
yuv2yuvX_10_start: times 4 dd 0x10000
yuv2yuvX_9_start: times 4 dd 0x20000
yuv2yuvX_10_upper: times 8 dw 0x3ff
yuv2yuvX_9_upper: times 8 dw 0x1ff
SECTION .text
@ -429,3 +435,233 @@ INIT_XMM
SCALE_FUNCS2 sse2, 6, 7, 8
SCALE_FUNCS2 ssse3, 6, 6, 8
SCALE_FUNCS2 sse4, 6, 6, 8
;-----------------------------------------------------------------------------
; vertical line scaling
;
; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW,
; const uint8_t *dither, int offset)
; and
; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize,
; const int16_t **src, uint8_t *dst, int dstW,
; const uint8_t *dither, int offset)
;
; Scale one or $filterSize lines of source data to generate one line of output
; data. The input is 15-bit in int16_t if $output_size is [8,10] and 19-bit in
; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
; of 2. $offset is either 0 or 3. $dither holds 8 values.
;-----------------------------------------------------------------------------
%macro yuv2planeX_fn 4
%ifdef ARCH_X86_32
%define cntr_reg r1
%define movsx mov
%else
%define cntr_reg r11
%define movsx movsxd
%endif
cglobal yuv2planeX_%2_%1, %4, 7, %3
%if %2 == 8 || %2 == 9 || %2 == 10
pxor m6, m6
%endif ; %2 == 8/9/10
%if %2 == 8
%ifdef ARCH_X86_32
%assign pad 0x2c - (stack_offset & 15)
SUB rsp, pad
%define m_dith m7
%else ; x86-64
%define m_dith m9
%endif ; x86-32
; create registers holding dither
movq m_dith, [r5] ; dither
test r6d, r6d
jz .no_rot
%if mmsize == 16
punpcklqdq m_dith, m_dith
%endif ; mmsize == 16
PALIGNR m_dith, m_dith, 3, m0
.no_rot:
%if mmsize == 16
punpcklbw m_dith, m6
%ifdef ARCH_X86_64
punpcklwd m8, m_dith, m6
pslld m8, 12
%else ; x86-32
punpcklwd m5, m_dith, m6
pslld m5, 12
%endif ; x86-32/64
punpckhwd m_dith, m6
pslld m_dith, 12
%ifdef ARCH_X86_32
mova [rsp+ 0], m5
mova [rsp+16], m_dith
%endif
%else ; mmsize == 8
punpcklbw m5, m_dith, m6
punpckhbw m_dith, m6
punpcklwd m4, m5, m6
punpckhwd m5, m6
punpcklwd m3, m_dith, m6
punpckhwd m_dith, m6
pslld m4, 12
pslld m5, 12
pslld m3, 12
pslld m_dith, 12
mova [rsp+ 0], m4
mova [rsp+ 8], m5
mova [rsp+16], m3
mova [rsp+24], m_dith
%endif ; mmsize == 8/16
%endif ; %2 == 8
xor r5, r5
.pixelloop
%assign %%i 0
; the rep here is for the 8bit output mmx case, where dither covers
; 8 pixels but we can only handle 2 pixels per register, and thus 4
; pixels per iteration. In order to not have to keep track of where
; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
%if %2 == 8
%rep 16/mmsize
%endif ; %2 == 8
%if %2 == 8
%ifdef ARCH_X86_32
mova m2, [rsp+mmsize*(0+%%i)]
mova m1, [rsp+mmsize*(1+%%i)]
%else ; x86-64
mova m2, m8
mova m1, m_dith
%endif ; x86-32/64
%else ; %2 == 9/10/16
mova m1, [yuv2yuvX_%2_start]
mova m2, m1
%endif ; %2 == 8/9/10/16
movsx cntr_reg, r1m
.filterloop_ %+ %%i
; input pixels
mov r6, [r2+gprsize*cntr_reg-2*gprsize]
%if %2 == 16
mova m3, [r6+r5*4]
mova m5, [r6+r5*4+mmsize]
%else ; %2 == 8/9/10
mova m3, [r6+r5*2]
%endif ; %2 == 8/9/10/16
mov r6, [r2+gprsize*cntr_reg-gprsize]
%if %2 == 16
mova m4, [r6+r5*4]
mova m6, [r6+r5*4+mmsize]
%else ; %2 == 8/9/10
mova m4, [r6+r5*2]
%endif ; %2 == 8/9/10/16
; coefficients
movd m0, [r0+2*cntr_reg-4]; coeff[0], coeff[1]
%if %2 == 16
pshuflw m7, m0, 0 ; coeff[0]
pshuflw m0, m0, 0x55 ; coeff[1]
pmovsxwd m7, m7 ; word -> dword
pmovsxwd m0, m0 ; word -> dword
pmulld m3, m7
pmulld m5, m7
pmulld m4, m0
pmulld m6, m0
paddd m2, m3
paddd m1, m5
paddd m2, m4
paddd m1, m6
%else ; %2 == 10/9/8
punpcklwd m5, m3, m4
punpckhwd m3, m4
SPLATD m0, m0
pmaddwd m5, m0
pmaddwd m3, m0
paddd m2, m5
paddd m1, m3
%endif ; %2 == 8/9/10/16
sub cntr_reg, 2
jg .filterloop_ %+ %%i
%if %2 == 16
psrad m2, 31 - %2
psrad m1, 31 - %2
%else ; %2 == 10/9/8
psrad m2, 27 - %2
psrad m1, 27 - %2
%endif ; %2 == 8/9/10/16
%if %2 == 8
packssdw m2, m1
packuswb m2, m2
movh [r3+r5*1], m2
%else ; %2 == 9/10/16
%if %2 == 16
packssdw m2, m1
paddw m2, [minshort]
%else ; %2 == 9/10
%ifidn %1, sse4
packusdw m2, m1
%elifidn %1, avx
packusdw m2, m1
%else ; mmx2/sse2
packssdw m2, m1
pmaxsw m2, m6
%endif ; mmx2/sse2/sse4/avx
pminsw m2, [yuv2yuvX_%2_upper]
%endif ; %2 == 9/10/16
mova [r3+r5*2], m2
%endif ; %2 == 8/9/10/16
add r5, mmsize/2
sub r4d, mmsize/2
%if %2 == 8
%assign %%i %%i+2
%endrep
%endif ; %2 == 8
jg .pixelloop
%if %2 == 8
%ifdef ARCH_X86_32
ADD rsp, pad
RET
%else ; x86-64
REP_RET
%endif ; x86-32/64
%else ; %2 == 9/10/16
REP_RET
%endif ; %2 == 8/9/10/16
%endmacro
%define PALIGNR PALIGNR_MMX
%ifdef ARCH_X86_32
INIT_MMX
yuv2planeX_fn mmx, 8, 0, 7
yuv2planeX_fn mmx2, 9, 0, 5
yuv2planeX_fn mmx2, 10, 0, 5
%endif
INIT_XMM
yuv2planeX_fn sse2, 8, 10, 7
yuv2planeX_fn sse2, 9, 7, 5
yuv2planeX_fn sse2, 10, 7, 5
%define PALIGNR PALIGNR_SSSE3
yuv2planeX_fn sse4, 8, 10, 7
yuv2planeX_fn sse4, 9, 7, 5
yuv2planeX_fn sse4, 10, 7, 5
yuv2planeX_fn sse4, 16, 8, 5
INIT_AVX
yuv2planeX_fn avx, 8, 10, 7
yuv2planeX_fn avx, 9, 7, 5
yuv2planeX_fn avx, 10, 7, 5

@ -213,6 +213,23 @@ SCALE_FUNCS_SSE(sse2);
SCALE_FUNCS_SSE(ssse3);
SCALE_FUNCS_SSE(sse4);
#define VSCALEX_FUNC(size, opt) \
extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
const int16_t **src, uint8_t *dest, int dstW, \
const uint8_t *dither, int offset)
#define VSCALEX_FUNCS(opt1, opt2) \
VSCALEX_FUNC(8, opt1); \
VSCALEX_FUNC(9, opt2); \
VSCALEX_FUNC(10, opt2)
#if ARCH_X86_32
VSCALEX_FUNCS(mmx, mmx2);
#endif
VSCALEX_FUNCS(sse2, sse2);
VSCALEX_FUNCS(sse4, sse4);
VSCALEX_FUNC(16, sse4);
VSCALEX_FUNCS(avx, avx);
void ff_sws_init_swScale_mmx(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
@ -249,10 +266,18 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
}
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt1, opt2, opt2chk, do_16_case) \
switch(c->dstBpc){ \
case 16: do_16_case; break; \
case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_10_ ## opt2; break; \
case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_9_ ## opt2; break; \
default: vscalefn = ff_yuv2planeX_8_ ## opt1; break; \
}
#if ARCH_X86_32
if (cpu_flags & AV_CPU_FLAG_MMX) {
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2,);
}
#endif
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
@ -266,6 +291,7 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
if (cpu_flags & AV_CPU_FLAG_SSE2) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, sse2, 1,);
}
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
@ -275,6 +301,12 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
/* Xto15 don't need special sse4 functions */
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4, sse4, 1,
if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4);
}
if (cpu_flags & AV_CPU_FLAG_AVX) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, avx, 1,);
}
#endif
}

@ -35,41 +35,6 @@
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
#define YSCALEYUV2YV12X(offset, dest, end, pos) \
__asm__ volatile(\
"movq "DITHER16"+0(%0), %%mm3 \n\t"\
"movq "DITHER16"+8(%0), %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t" /* FIXME Unroll? */\
"1: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\
"movq 8(%%"REG_S", %3, 2), %%mm5 \n\t" /* srcData */\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
" jnz 1b \n\t"\
"psraw $3, %%mm3 \n\t"\
"psraw $3, %%mm4 \n\t"\
"packuswb %%mm4, %%mm3 \n\t"\
MOVNTQ(%%mm3, (%1, %3))\
"add $8, %3 \n\t"\
"cmp %2, %3 \n\t"\
"movq "DITHER16"+0(%0), %%mm3 \n\t"\
"movq "DITHER16"+8(%0), %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
: "%"REG_d, "%"REG_S\
);
#if !COMPILE_TEMPLATE_MMX2
static av_always_inline void
dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot)
@ -106,170 +71,6 @@ dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot)
}
#endif
static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc,
int chrFilterSize, const int16_t **alpSrc,
uint8_t *dest[4], int dstW, int chrDstW)
{
uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
if (uDest) {
x86_reg uv_off = c->uv_offx2 >> 1;
dither_8to16(c, chrDither, 0);
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
dither_8to16(c, chrDither, 1);
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
}
dither_8to16(c, lumDither, 0);
if (CONFIG_SWSCALE_ALPHA && aDest) {
YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
}
YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
}
#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
__asm__ volatile(\
"lea " offset "(%0), %%"REG_d" \n\t"\
"movq "DITHER32"+0(%0), %%mm4 \n\t"\
"movq "DITHER32"+8(%0), %%mm5 \n\t"\
"movq "DITHER32"+16(%0), %%mm6 \n\t"\
"movq "DITHER32"+24(%0), %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%%"REG_S", %3, 2), %%mm0 \n\t" /* srcData */\
"movq 8(%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
"movq (%%"REG_S", %3, 2), %%mm1 \n\t" /* srcData */\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
"movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 8(%%"REG_S", %3, 2), %%mm3 \n\t" /* srcData */\
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm2 \n\t"\
"pmaddwd %%mm1, %%mm0 \n\t"\
"paddd %%mm2, %%mm6 \n\t"\
"paddd %%mm0, %%mm7 \n\t"\
" jnz 1b \n\t"\
"psrad $19, %%mm4 \n\t"\
"psrad $19, %%mm5 \n\t"\
"psrad $19, %%mm6 \n\t"\
"psrad $19, %%mm7 \n\t"\
"packssdw %%mm5, %%mm4 \n\t"\
"packssdw %%mm7, %%mm6 \n\t"\
"packuswb %%mm6, %%mm4 \n\t"\
MOVNTQ(%%mm4, (%1, %3))\
"add $8, %3 \n\t"\
"cmp %2, %3 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"movq "DITHER32"+0(%0), %%mm4 \n\t"\
"movq "DITHER32"+8(%0), %%mm5 \n\t"\
"movq "DITHER32"+16(%0), %%mm6 \n\t"\
"movq "DITHER32"+24(%0), %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#if !COMPILE_TEMPLATE_MMX2
static av_always_inline void
dither_8to32(SwsContext *c, const uint8_t *srcDither, int rot)
{
if (rot) {
__asm__ volatile("pxor %%mm0, %%mm0\n\t"
"movq (%0), %%mm4\n\t"
"movq %%mm4, %%mm5\n\t"
"psrlq $24, %%mm4\n\t"
"psllq $40, %%mm5\n\t"
"por %%mm5, %%mm4\n\t"
"movq %%mm4, %%mm6\n\t"
"punpcklbw %%mm0, %%mm4\n\t"
"punpckhbw %%mm0, %%mm6\n\t"
"movq %%mm4, %%mm5\n\t"
"movq %%mm6, %%mm7\n\t"
"punpcklwd %%mm0, %%mm4\n\t"
"punpckhwd %%mm0, %%mm5\n\t"
"punpcklwd %%mm0, %%mm6\n\t"
"punpckhwd %%mm0, %%mm7\n\t"
"pslld $12, %%mm4\n\t"
"pslld $12, %%mm5\n\t"
"pslld $12, %%mm6\n\t"
"pslld $12, %%mm7\n\t"
"movq %%mm4, "DITHER32"+0(%1)\n\t"
"movq %%mm5, "DITHER32"+8(%1)\n\t"
"movq %%mm6, "DITHER32"+16(%1)\n\t"
"movq %%mm7, "DITHER32"+24(%1)\n\t"
:: "r"(srcDither), "r"(&c->redDither)
);
} else {
__asm__ volatile("pxor %%mm0, %%mm0\n\t"
"movq (%0), %%mm4\n\t"
"movq %%mm4, %%mm6\n\t"
"punpcklbw %%mm0, %%mm4\n\t"
"punpckhbw %%mm0, %%mm6\n\t"
"movq %%mm4, %%mm5\n\t"
"movq %%mm6, %%mm7\n\t"
"punpcklwd %%mm0, %%mm4\n\t"
"punpckhwd %%mm0, %%mm5\n\t"
"punpcklwd %%mm0, %%mm6\n\t"
"punpckhwd %%mm0, %%mm7\n\t"
"pslld $12, %%mm4\n\t"
"pslld $12, %%mm5\n\t"
"pslld $12, %%mm6\n\t"
"pslld $12, %%mm7\n\t"
"movq %%mm4, "DITHER32"+0(%1)\n\t"
"movq %%mm5, "DITHER32"+8(%1)\n\t"
"movq %%mm6, "DITHER32"+16(%1)\n\t"
"movq %%mm7, "DITHER32"+24(%1)\n\t"
:: "r"(srcDither), "r"(&c->redDither)
);
}
}
#endif
static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc,
int chrFilterSize, const int16_t **alpSrc,
uint8_t *dest[4], int dstW, int chrDstW)
{
uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
*aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
if (uDest) {
x86_reg uv_off = c->uv_offx2 >> 1;
dither_8to32(c, chrDither, 0);
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
dither_8to32(c, chrDither, 1);
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
}
dither_8to32(c, lumDither, 0);
if (CONFIG_SWSCALE_ALPHA && aDest) {
YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
}
YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
}
static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
const int16_t *chrUSrc, const int16_t *chrVSrc,
const int16_t *alpSrc,
@ -2095,8 +1896,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
&& dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND) {
c->yuv2yuv1 = RENAME(yuv2yuv1_ar );
c->yuv2yuvX = RENAME(yuv2yuvX_ar );
//c->yuv2yuv1 = RENAME(yuv2yuv1_ar );
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
@ -2108,9 +1908,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
}
} else {
int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
c->yuv2yuv1 = should_dither ? RENAME(yuv2yuv1_ar ) : RENAME(yuv2yuv1 );
c->yuv2yuvX = RENAME(yuv2yuvX );
//c->yuv2yuv1 = RENAME(yuv2yuv1 );
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;

@ -40,16 +40,16 @@ yuv420p9le 9ed4b1dfabc53fd9e586ff6c4c43af80
yuv422p c9bba4529821d796a6ab09f6a5fd355a
yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p16be 5499502e1c29534a158a1fe60e889f60
yuv422p16le e3d61fde6978591596bc36b914386623
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
yuv444p10le 767179dd82846cf00ee4c340c9c1ab74
yuv444p16be 3ad639fff73e56f3b09dd20c335478d6
yuv444p16le 8a7e66dc91ab7971fd24a9105ff2699b
yuv444p16be 1c6ea2c2f5e539006112ceec3d4e7d90
yuv444p16le 20f86bc2f68d2b3f1f2b48b97b2189f4
yuv444p9be 6ab31f4c12b533ce318ecdff83cdd054
yuv444p9le f0606604a5c08becab6ba500124c4b7c
yuva420p a29884f3f3dfe1e00b961bc17bef3d47

@ -40,16 +40,16 @@ yuv420p9le 9ed4b1dfabc53fd9e586ff6c4c43af80
yuv422p c9bba4529821d796a6ab09f6a5fd355a
yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p16be 5499502e1c29534a158a1fe60e889f60
yuv422p16le e3d61fde6978591596bc36b914386623
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
yuv444p10le 767179dd82846cf00ee4c340c9c1ab74
yuv444p16be 3ad639fff73e56f3b09dd20c335478d6
yuv444p16le 8a7e66dc91ab7971fd24a9105ff2699b
yuv444p16be 1c6ea2c2f5e539006112ceec3d4e7d90
yuv444p16le 20f86bc2f68d2b3f1f2b48b97b2189f4
yuv444p9be 6ab31f4c12b533ce318ecdff83cdd054
yuv444p9le f0606604a5c08becab6ba500124c4b7c
yuva420p a29884f3f3dfe1e00b961bc17bef3d47

@ -33,8 +33,8 @@ yuv422p16be 167e4338811a7d272925a4c6417d60da
yuv422p16le 3359395d5875d581fa1e975013d30114
yuv440p 2472417d980e395ad6843cbb8b633b29
yuv444p 1f151980486848c96bc5585ced99003e
yuv444p16be 5d0c0ea66ab43c0c590d8c2a9256e43f
yuv444p16le 3c0a747c1b64feb0ab8dfba92f92579a
yuv444p16be 1ce8fcd4712d525af983e6179d6a4f9e
yuv444p16le 5f1441e18345aadb3f881dac99c6c08a
yuva420p 7536753dfbc7932560fb50c921369a0e
yuvj420p 21f891093006d42d7683b0e1d773a657
yuvj422p 9a43d474c407590ad8f213880586b45e

@ -29,12 +29,12 @@ yuv420p 2d5c80f9ba2ddd85b2aeda3564cc7d64
yuv420p16be 1c4fa93d0744de3cdc6d34ab55db3fb4
yuv420p16le 92c74f5759068c381e4a066fe7faf2e0
yuv422p 6e728f4eb9eae287c224f396d84be6ea
yuv422p16be 69cf0605496c321546899a8442ee64fb
yuv422p16le f0b443fea72f4b6f462859a73b159664
yuv422p16be a05d43cd62b790087bd37083174557de
yuv422p16le 6954abebcbc62d81068d58d0c62bdd5b
yuv440p a99e2b57ed601f39852715c9d675d0d3
yuv444p 947e47f7bb5fdccc659d19b7df2b6fc3
yuv444p16be bc7d53923cff1d7e98d24540845fb64b
yuv444p16le 5df206a93f85ef8b77f5bdc81d9b0a0b
yuv444p16be 58c012e5ab73b066ef3c2b6411a395f1
yuv444p16le 32c12794e184042a59738ab2de608c8d
yuva420p d83ec0c01498189f179ec574918185f1
yuvj420p df3aaaec3bb157c3bde5f0365af30f4f
yuvj422p d113871528d510a192797af59df9c05c

@ -40,16 +40,16 @@ yuv420p9le 9ed4b1dfabc53fd9e586ff6c4c43af80
yuv422p c9bba4529821d796a6ab09f6a5fd355a
yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p16be 5499502e1c29534a158a1fe60e889f60
yuv422p16le e3d61fde6978591596bc36b914386623
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
yuv444p10le 767179dd82846cf00ee4c340c9c1ab74
yuv444p16be 3ad639fff73e56f3b09dd20c335478d6
yuv444p16le 8a7e66dc91ab7971fd24a9105ff2699b
yuv444p16be 1c6ea2c2f5e539006112ceec3d4e7d90
yuv444p16le 20f86bc2f68d2b3f1f2b48b97b2189f4
yuv444p9be 6ab31f4c12b533ce318ecdff83cdd054
yuv444p9le f0606604a5c08becab6ba500124c4b7c
yuva420p a29884f3f3dfe1e00b961bc17bef3d47

@ -33,23 +33,23 @@ yuv411p 1143e7c5cc28fe0922b051b17733bc4c
yuv420p fdad2d8df8985e3d17e73c71f713cb14
yuv420p10be 6d335e75b553da590135cf8bb999610c
yuv420p10le d510ddbabefd03ef39ec943fcb51b709
yuv420p16be 2a75942af24fbdc1fdfe189c6e7bf589
yuv420p16le c4264d92a7c273967a778f4f5daddbe3
yuv420p16be 31988e9a5d6acacaa710f67bc1172f3a
yuv420p16le f5390ce399f88e0e4e2621ed7833b250
yuv420p9be ec4983b7a949c0472110a7a2c58e278a
yuv420p9le c136dce5913a722eee44ab72cff664b2
yuv422p 918e37701ee7377d16a8a6c119c56a40
yuv422p10be cea7ca6b0e66d6f29539885896c88603
yuv422p10le a10c4a5837547716f13cd61918b145f9
yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c
yuv422p16le 61bfcee8e54465f760164f5a75d40b5e
yuv422p16be e7e34fe9264784763ab6cb406524c0f3
yuv422p16le c435b76b08204dda6908640fb5fd4621
yuv422p9be 82494823944912f73cebc58ad2979bbd
yuv422p9le fc69c8a21f473916a4b4225636b97e06
yuv440p 461503fdb9b90451020aa3b25ddf041c
yuv444p 81b2eba962d12e8d64f003ac56f6faf2
yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9
yuv444p10le 02f0a336e9da062a64df1ba487e102c5
yuv444p16be 2677f3074d255f9dab625e9e2e092ca5
yuv444p16le 65fa92521ef97088599ea83f9508cd5b
yuv444p16be 0da9bed80f5542682ab286f3261cf24c
yuv444p16le a0c5d3c7bf3f181db503cf8e450d1335
yuv444p9be 9ac2643ce7f7e5c4e17c8c9fd8494d4a
yuv444p9le 896a1cc9cccca1ba410dd53942d33cc4
yuva420p 8673a9131fb47de69788863f93a50eb7

@ -40,16 +40,16 @@ yuv420p9le 0f1e371a1374d3cba2205b70cc7cac90
yuv422p d7f5cb44d9b0210d66d6a8762640ab34
yuv422p10be 588fe319b96513c32e21d3e32b45447f
yuv422p10le 11b57f2bd9661024153f3973b9090cdb
yuv422p16be c092d083548c2a144c372a98c46875c7
yuv422p16le c071b9397a416d51cbe339345cbcba84
yuv422p16be 9bd8f8c961822b586fa4cf992be54acc
yuv422p16le 9c4a1239605c7952b736ac3130163f14
yuv422p9be 7c6f1e140b3999ee7d923854e507752a
yuv422p9le 51f10d79c07989060dd06e767e6d7d60
yuv440p 876385e96165acf51271b20e5d85a416
yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7
yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd
yuv444p10le 2d0947ae89ecc6a501eee6832cb27e06
yuv444p16be 6a954614fd2a8ae0df53e4fd76937af8
yuv444p16le 65613965fb58cc4c3cd480a68b6540ea
yuv444p16be de2dedfc6f12073ffead113f86e07ecf
yuv444p16le 8e83323cf102d6c823a03ae8a7b7e033
yuv444p9be 6ac92b7dc9ab2fc59bee99204886899a
yuv444p9le 85aef13a654953d3455d89770b0d74bd
yuva420p c705d1cf061d8c6580ac690b55f92276

Loading…
Cancel
Save