Merge remote-tracking branch 'qatar/master'

* qatar/master:
  mpegvideo_enc: only allocate output packet when we know there will be output
  Add names for more channel layouts to the channel layout map.
  sunrast: Add a sample request for RMP_RAW colormap.
  avcodec: do not override pts or duration from the audio encoder
  Add prores regression test.
  Enable already existing rso regression test.
  Add regression test for "sox" format muxer/demuxer.
  Add dpx encoding regression test.
  swscale: K&R formatting cosmetics for PowerPC code (part I/II)
  img2: Use ff_guess_image2_codec(filename) shorthand where appropriate.
  Clarify licensing information about files borrowed from libjpeg.
  Mark mutable static data const where appropriate.
  avplay: fix -threads option
  dvbsubdec: avoid undefined signed left shift in RGBA macro
  mlpdec: use av_log_ask_for_sample()
  gif: K&R formatting cosmetics
  png: make .long_name more descriptive
  movdec: Adjust keyframe flagging in fragmented files
  rv34: change most "int stride" into "ptrdiff_t stride".

Conflicts:
	avprobe.c
	ffplay.c
	libavcodec/mlpdec.c
	libavcodec/mpegvideo_enc.c
	libavcodec/pngenc.c
	libavcodec/x86/v210-init.c
	libavfilter/vf_boxblur.c
	libavfilter/vf_crop.c
	libavfilter/vf_drawtext.c
	libavfilter/vf_lut.c
	libavfilter/vf_overlay.c
	libavfilter/vf_pad.c
	libavfilter/vf_scale.c
	libavfilter/vf_select.c
	libavfilter/vf_setpts.c
	libavfilter/vf_settb.c
	libavformat/img2.c
	libavutil/audioconvert.c
	tests/codec-regression.sh
	tests/lavf-regression.sh
	tests/ref/lavf/dpx
	tests/ref/vsynth1/prores
	tests/ref/vsynth2/prores

Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/3/merge
Michael Niedermayer 13 years ago
commit 184fc600e1
  1. 8
      LICENSE
  2. 12
      ffprobe.c
  3. 2
      ffserver.c
  4. 2
      libavcodec/aacdec.c
  5. 4
      libavcodec/arm/rv34dsp_init_neon.c
  6. 12
      libavcodec/arm/rv40dsp_init_neon.c
  7. 2
      libavcodec/dvbsubdec.c
  8. 2
      libavcodec/libvpxenc.c
  9. 2
      libavcodec/libxvid_rc.c
  10. 30
      libavcodec/mlpdec.c
  11. 28
      libavcodec/mpegvideo_enc.c
  12. 2
      libavcodec/pngdec.c
  13. 2
      libavcodec/pngenc.c
  14. 4
      libavcodec/rv34dsp.c
  15. 12
      libavcodec/rv34dsp.h
  16. 20
      libavcodec/rv40dsp.c
  17. 6
      libavcodec/sunrast.c
  18. 8
      libavcodec/utils.c
  19. 4
      libavcodec/x86/rv34dsp_init.c
  20. 4
      libavcodec/x86/rv40dsp_init.c
  21. 8
      libavcodec/x86/v210-init.c
  22. 2
      libavfilter/vf_boxblur.c
  23. 2
      libavfilter/vf_crop.c
  24. 6
      libavfilter/vf_drawtext.c
  25. 2
      libavfilter/vf_lut.c
  26. 2
      libavfilter/vf_overlay.c
  27. 2
      libavfilter/vf_pad.c
  28. 2
      libavfilter/vf_scale.c
  29. 2
      libavfilter/vf_select.c
  30. 2
      libavfilter/vf_setpts.c
  31. 2
      libavfilter/vf_settb.c
  32. 149
      libavformat/gif.c
  33. 16
      libavformat/img2.c
  34. 2
      libavformat/matroskadec.c
  35. 13
      libavformat/mov.c
  36. 17
      libavutil/audioconvert.c
  37. 4
      libavutil/eval.c
  38. 444
      libswscale/ppc/swscale_altivec.c
  39. 173
      libswscale/ppc/yuv2yuv_altivec.c
  40. 5
      tests/codec-regression.sh
  41. 4
      tests/ref/vsynth1/prores_kostya
  42. 4
      tests/ref/vsynth2/prores_kostya

@ -20,8 +20,12 @@ Specifically, the GPL parts of FFmpeg are
There are a handful of files under other licensing terms, namely:
* The files libavcodec/jfdctfst.c, libavcodec/jfdctint.c, libavcodec/jrevdct.c
are taken from libjpeg, see the top of the files for licensing details.
* The files libavcodec/jfdctfst.c, libavcodec/jfdctint_template.c and
libavcodec/jrevdct.c are taken from libjpeg, see the top of the files for
licensing details. Specifically note that you must credit the IJG in the
documentation accompanying your program if you only distribute executables.
You must also indicate any changes including additions and deletions to
those three files in the documentation.
Should you, for whatever reason, prefer to use version 3 of the (L)GPL, then
the configure parameter --enable-version3 will activate this licensing option

@ -68,13 +68,13 @@ static const OptionDef options[];
static const char *input_filename;
static AVInputFormat *iformat = NULL;
static const char *binary_unit_prefixes [] = { "", "Ki", "Mi", "Gi", "Ti", "Pi" };
static const char *decimal_unit_prefixes[] = { "", "K" , "M" , "G" , "T" , "P" };
static const char *const binary_unit_prefixes [] = { "", "Ki", "Mi", "Gi", "Ti", "Pi" };
static const char *const decimal_unit_prefixes[] = { "", "K" , "M" , "G" , "T" , "P" };
static const char *unit_second_str = "s" ;
static const char *unit_hertz_str = "Hz" ;
static const char *unit_byte_str = "byte" ;
static const char *unit_bit_per_second_str = "bit/s";
static const char unit_second_str[] = "s" ;
static const char unit_hertz_str[] = "Hz" ;
static const char unit_byte_str[] = "byte" ;
static const char unit_bit_per_second_str[] = "bit/s";
static uint64_t *nb_streams_packets;
static uint64_t *nb_streams_frames;

@ -1872,7 +1872,7 @@ static int http_parse_request(HTTPContext *c)
static void fmt_bytecount(AVIOContext *pb, int64_t count)
{
static const char *suffix = " kMGTP";
static const char suffix[] = " kMGTP";
const char *s;
for (s = suffix; count >= 100000 && s[1]; count /= 1000, s++);

@ -1036,7 +1036,7 @@ static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
int offset[3] = { global_gain, global_gain - 90, 0 };
int clipped_offset;
int noise_flag = 1;
static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
static const char *const sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
for (g = 0; g < ics->num_window_groups; g++) {
for (i = 0; i < ics->max_sfb;) {
int run_end = band_type_run_end[idx];

@ -27,8 +27,8 @@ void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
void ff_rv34_inv_transform_noround_dc_neon(DCTELEM *block);
void ff_rv34_idct_add_neon(uint8_t *dst, int stride, DCTELEM *block);
void ff_rv34_idct_dc_add_neon(uint8_t *dst, int stride, int dc);
void ff_rv34_idct_add_neon(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
void ff_rv34_idct_dc_add_neon(uint8_t *dst, ptrdiff_t stride, int dc);
void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
{

@ -51,20 +51,20 @@ void ff_put_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_rv40_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int);
void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int);
void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t);
void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, ptrdiff_t);
int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, int stride,
int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride,
int beta, int beta2, int edge,
int *p1, int *q1);
int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, int stride,
int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, ptrdiff_t stride,
int beta, int beta2, int edge,
int *p1, int *q1);
void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, int stride, int filter_p1,
void ff_rv40_h_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1,
int filter_q1, int alpha, int beta,
int lim_p0q0, int lim_q1, int lim_p1);
void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, int stride, int filter_p1,
void ff_rv40_v_weak_loop_filter_neon(uint8_t *src, ptrdiff_t stride, int filter_p1,
int filter_q1, int alpha, int beta,
int lim_p0q0, int lim_q1, int lim_p1);

@ -150,7 +150,7 @@ static void png_save2(const char *filename, uint32_t *bitmap, int w, int h)
}
#endif
#define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
#define RGBA(r,g,b,a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b))
typedef struct DVBSubCLUT {
int id;

@ -77,7 +77,7 @@ typedef struct VP8EncoderContext {
} VP8Context;
/** String mappings for enum vp8e_enc_control_id */
static const char *ctlidstr[] = {
static const char *const ctlidstr[] = {
[VP8E_UPD_ENTROPY] = "VP8E_UPD_ENTROPY",
[VP8E_UPD_REFERENCE] = "VP8E_UPD_REFERENCE",
[VP8E_USE_REFERENCE] = "VP8E_USE_REFERENCE",

@ -48,7 +48,7 @@ int ff_xvid_rate_control_init(MpegEncContext *s){
}
for(i=0; i<s->rc_context.num_entries; i++){
static const char *frame_types = " ipbs";
static const char frame_types[] = " ipbs";
char tmp[256];
RateControlEntry *rce;

@ -38,12 +38,6 @@
/** number of bits used for VLC lookup - longest Huffman code is 9 */
#define VLC_BITS 9
static const char* sample_message =
"Please file a bug report following the instructions at "
"http://ffmpeg.org/bugreports.html and include "
"a sample of this file.";
typedef struct SubStream {
/// Set if a valid restart header has been read. Otherwise the substream cannot be decoded.
uint8_t restart_seen;
@ -308,10 +302,10 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
return AVERROR_INVALIDDATA;
}
if (mh.num_substreams > MAX_SUBSTREAMS) {
av_log(m->avctx, AV_LOG_ERROR,
av_log_ask_for_sample(m->avctx,
"Number of substreams %d is larger than the maximum supported "
"by the decoder. %s\n", mh.num_substreams, sample_message);
return AVERROR_INVALIDDATA;
"by the decoder.\n", mh.num_substreams);
return AVERROR_PATCHWELCOME;
}
m->access_unit_size = mh.access_unit_size;
@ -410,10 +404,10 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
/* This should happen for TrueHD streams with >6 channels and MLP's noise
* type. It is not yet known if this is allowed. */
if (s->max_channel > MAX_MATRIX_CHANNEL_MLP && !s->noise_type) {
av_log(m->avctx, AV_LOG_ERROR,
av_log_ask_for_sample(m->avctx,
"Number of channels %d is larger than the maximum supported "
"by the decoder. %s\n", s->max_channel+2, sample_message);
return AVERROR_INVALIDDATA;
"by the decoder.\n", s->max_channel + 2);
return AVERROR_PATCHWELCOME;
}
if (s->min_channel > s->max_channel) {
@ -455,10 +449,10 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
for (ch = 0; ch <= s->max_matrix_channel; ch++) {
int ch_assign = get_bits(gbp, 6);
if (ch_assign > s->max_matrix_channel) {
av_log(m->avctx, AV_LOG_ERROR,
"Assignment of matrix channel %d to invalid output channel %d. %s\n",
ch, ch_assign, sample_message);
return AVERROR_INVALIDDATA;
av_log_ask_for_sample(m->avctx,
"Assignment of matrix channel %d to invalid output channel %d.\n",
ch, ch_assign);
return AVERROR_PATCHWELCOME;
}
s->ch_assign[ch_assign] = ch;
}
@ -813,8 +807,8 @@ static int read_block_data(MLPDecodeContext *m, GetBitContext *gbp,
if (s->data_check_present) {
expected_stream_pos = get_bits_count(gbp);
expected_stream_pos += get_bits(gbp, 16);
av_log(m->avctx, AV_LOG_WARNING, "This file contains some features "
"we have not tested yet. %s\n", sample_message);
av_log_ask_for_sample(m->avctx, "This file contains some features "
"we have not tested yet.\n");
}
if (s->blockpos + s->blocksize > m->access_unit_size) {

@ -1395,20 +1395,6 @@ int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
int i, stuffing_count, ret;
int context_count = s->slice_context_count;
if (!pkt->data &&
(ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
return ret;
for (i = 0; i < context_count; i++) {
int start_y = s->thread_context[i]->start_mb_y;
int end_y = s->thread_context[i]-> end_mb_y;
int h = s->mb_height;
uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
uint8_t *end = pkt->data + (size_t)(((int64_t) pkt->size) * end_y / h);
init_put_bits(&s->thread_context[i]->pb, start, end - start);
}
s->picture_in_gop_number++;
if (load_input_picture(s, pic_arg) < 0)
@ -1420,6 +1406,20 @@ int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
/* output? */
if (s->new_picture.f.data[0]) {
if (!pkt->data &&
(ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
return ret;
for (i = 0; i < context_count; i++) {
int start_y = s->thread_context[i]->start_mb_y;
int end_y = s->thread_context[i]-> end_mb_y;
int h = s->mb_height;
uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
uint8_t *end = pkt->data + (size_t)(((int64_t) pkt->size) * end_y / h);
init_put_bits(&s->thread_context[i]->pb, start, end - start);
}
s->pict_type = s->new_picture.f.pict_type;
//emms_c();
//printf("qs:%f %f %d\n", s->new_picture.quality,

@ -732,5 +732,5 @@ AVCodec ff_png_decoder = {
.close = png_dec_end,
.decode = decode_frame,
.capabilities = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
.long_name = NULL_IF_CONFIG_SMALL("PNG image"),
.long_name = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
};

@ -445,5 +445,5 @@ AVCodec ff_png_encoder = {
PIX_FMT_GRAY8, PIX_FMT_GRAY8A,
PIX_FMT_GRAY16BE,
PIX_FMT_MONOBLACK, PIX_FMT_NONE},
.long_name= NULL_IF_CONFIG_SMALL("PNG image"),
.long_name= NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
};

@ -53,7 +53,7 @@ static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block)
* Real Video 3.0/4.0 inverse transform + sample reconstruction
* Code is almost the same as in SVQ3, only scaling is different.
*/
static void rv34_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, DCTELEM *block){
int temp[16];
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int i;
@ -101,7 +101,7 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
}
}
static void rv34_idct_dc_add_c(uint8_t *dst, int stride, int dc)
static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc)
{
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int i, j;

@ -32,24 +32,24 @@
typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
uint8_t *src1/*align width (8 or 16)*/,
uint8_t *src2/*align width (8 or 16)*/,
int w1, int w2, int stride);
int w1, int w2, ptrdiff_t stride);
typedef void (*rv34_inv_transform_func)(DCTELEM *block);
typedef void (*rv34_idct_add_func)(uint8_t *dst, int stride, DCTELEM *block);
typedef void (*rv34_idct_dc_add_func)(uint8_t *dst, int stride,
typedef void (*rv34_idct_add_func)(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
typedef void (*rv34_idct_dc_add_func)(uint8_t *dst, ptrdiff_t stride,
int dc);
typedef void (*rv40_weak_loop_filter_func)(uint8_t *src, int stride,
typedef void (*rv40_weak_loop_filter_func)(uint8_t *src, ptrdiff_t stride,
int filter_p1, int filter_q1,
int alpha, int beta,
int lims, int lim_q1, int lim_p1);
typedef void (*rv40_strong_loop_filter_func)(uint8_t *src, int stride,
typedef void (*rv40_strong_loop_filter_func)(uint8_t *src, ptrdiff_t stride,
int alpha, int lims,
int dmode, int chroma);
typedef int (*rv40_loop_filter_strength_func)(uint8_t *src, int stride,
typedef int (*rv40_loop_filter_strength_func)(uint8_t *src, ptrdiff_t stride,
int beta, int beta2, int edge,
int *p1, int *q1);

@ -278,7 +278,7 @@ RV40_CHROMA_MC(put_, op_put)
RV40_CHROMA_MC(avg_, op_avg)
#define RV40_WEIGHT_FUNC(size) \
static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\
static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
{\
int i, j;\
\
@ -316,7 +316,7 @@ static const uint8_t rv40_dither_r[16] = {
*/
static av_always_inline void rv40_weak_loop_filter(uint8_t *src,
const int step,
const int stride,
const ptrdiff_t stride,
const int filter_p1,
const int filter_q1,
const int alpha,
@ -362,7 +362,7 @@ static av_always_inline void rv40_weak_loop_filter(uint8_t *src,
}
}
static void rv40_h_weak_loop_filter(uint8_t *src, const int stride,
static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
const int filter_p1, const int filter_q1,
const int alpha, const int beta,
const int lim_p0q0, const int lim_q1,
@ -372,7 +372,7 @@ static void rv40_h_weak_loop_filter(uint8_t *src, const int stride,
alpha, beta, lim_p0q0, lim_q1, lim_p1);
}
static void rv40_v_weak_loop_filter(uint8_t *src, const int stride,
static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
const int filter_p1, const int filter_q1,
const int alpha, const int beta,
const int lim_p0q0, const int lim_q1,
@ -384,7 +384,7 @@ static void rv40_v_weak_loop_filter(uint8_t *src, const int stride,
static av_always_inline void rv40_strong_loop_filter(uint8_t *src,
const int step,
const int stride,
const ptrdiff_t stride,
const int alpha,
const int lims,
const int dmode,
@ -440,14 +440,14 @@ static av_always_inline void rv40_strong_loop_filter(uint8_t *src,
}
}
static void rv40_h_strong_loop_filter(uint8_t *src, const int stride,
static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
const int alpha, const int lims,
const int dmode, const int chroma)
{
rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
}
static void rv40_v_strong_loop_filter(uint8_t *src, const int stride,
static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
const int alpha, const int lims,
const int dmode, const int chroma)
{
@ -455,7 +455,7 @@ static void rv40_v_strong_loop_filter(uint8_t *src, const int stride,
}
static av_always_inline int rv40_loop_filter_strength(uint8_t *src,
int step, int stride,
int step, ptrdiff_t stride,
int beta, int beta2,
int edge,
int *p1, int *q1)
@ -490,14 +490,14 @@ static av_always_inline int rv40_loop_filter_strength(uint8_t *src,
return strong0 && strong1;
}
static int rv40_h_loop_filter_strength(uint8_t *src, int stride,
static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride,
int beta, int beta2, int edge,
int *p1, int *q1)
{
return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1);
}
static int rv40_v_loop_filter_strength(uint8_t *src, int stride,
static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride,
int beta, int beta2, int edge,
int *p1, int *q1)
{

@ -77,7 +77,11 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
av_log(avctx, AV_LOG_ERROR, "invalid image size\n");
return AVERROR_INVALIDDATA;
}
if (maptype & ~1) {
if (maptype == RMT_RAW) {
av_log_ask_for_sample(avctx, "unsupported colormap type\n");
return AVERROR_PATCHWELCOME;
}
if (maptype > RMT_RAW) {
av_log(avctx, AV_LOG_ERROR, "invalid colormap type\n");
return AVERROR_INVALIDDATA;
}

@ -992,9 +992,11 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
if (!ret && *got_packet_ptr) {
if (!(avctx->codec->capabilities & CODEC_CAP_DELAY)) {
avpkt->pts = frame->pts;
avpkt->duration = ff_samples_to_time_base(avctx,
frame->nb_samples);
if (avpkt->pts == AV_NOPTS_VALUE)
avpkt->pts = frame->pts;
if (!avpkt->duration)
avpkt->duration = ff_samples_to_time_base(avctx,
frame->nb_samples);
}
avpkt->dts = avpkt->pts;
} else {

@ -26,8 +26,8 @@
void ff_rv34_idct_dc_mmx2(DCTELEM *block);
void ff_rv34_idct_dc_noround_mmx2(DCTELEM *block);
void ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
void ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
{

@ -42,9 +42,9 @@ void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
#define DECLARE_WEIGHT(opt) \
void ff_rv40_weight_func_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \
int w1, int w2, int stride); \
int w1, int w2, ptrdiff_t stride); \
void ff_rv40_weight_func_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
int w1, int w2, int stride);
int w1, int w2, ptrdiff_t stride);
DECLARE_WEIGHT(mmx)
DECLARE_WEIGHT(sse2)
DECLARE_WEIGHT(ssse3)

@ -1,18 +1,18 @@
/*
* This file is part of Libav.
* This file is part of FFmpeg.
*
* Libav is free software; you can redistribute it and/or
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

@ -30,7 +30,7 @@
#include "libavutil/pixdesc.h"
#include "avfilter.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"w",
"h",
"cw",

@ -32,7 +32,7 @@
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"in_w", "iw", ///< width of the input video
"in_h", "ih", ///< height of the input video
"out_w", "ow", ///< width of the cropped video

@ -51,7 +51,7 @@
#include FT_FREETYPE_H
#include FT_GLYPH_H
static const char * const var_names[] = {
static const char *const var_names[] = {
"main_w", "w", "W", ///< width of the input video
"main_h", "h", "H", ///< height of the input video
"tw", "text_w", ///< width of the rendered text
@ -72,8 +72,8 @@ static const char * const var_names[] = {
NULL
};
static const char *fun2_names[] = {
"rand",
static const char *const fun2_names[] = {
"rand"
};
static double drand(void *opaque, double min, double max)

@ -30,7 +30,7 @@
#include "avfilter.h"
#include "internal.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"w", ///< width of the input video
"h", ///< height of the input video
"val", ///< input value for the pixel

@ -38,7 +38,7 @@
#include "internal.h"
#include "drawutils.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"main_w", "W", ///< width of the main video
"main_h", "H", ///< height of the main video
"overlay_w", "w", ///< width of the overlay video

@ -35,7 +35,7 @@
#include "libavutil/mathematics.h"
#include "drawutils.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"in_w", "iw",
"in_h", "ih",
"out_w", "ow",

@ -31,7 +31,7 @@
#include "libavutil/avassert.h"
#include "libswscale/swscale.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"in_w", "iw",
"in_h", "ih",
"out_w", "ow",

@ -27,7 +27,7 @@
#include "libavutil/fifo.h"
#include "avfilter.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"TB", ///< timebase
"pts", ///< original pts in the file of the frame

@ -30,7 +30,7 @@
#include "libavutil/mathematics.h"
#include "avfilter.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"INTERLACED", ///< tell if the current frame is interlaced
"N", ///< frame number (starting at zero)
"POS", ///< original position in the file of the frame

@ -30,7 +30,7 @@
#include "avfilter.h"
#include "internal.h"
static const char * const var_names[] = {
static const char *const var_names[] = {
"AVTB", /* default timebase 1/AV_TIME_BASE */
"intb", /* input timebase */
NULL

@ -53,7 +53,9 @@
#define GIF_CHUNKS 100
/* slows down the decoding (and some browsers don't like it) */
/* update on the 'some browsers don't like it issue from above: this was probably due to missing 'Data Sub-block Terminator' (byte 19) in the app_header */
/* update on the 'some browsers don't like it issue from above:
* this was probably due to missing 'Data Sub-block Terminator'
* (byte 19) in the app_header */
#define GIF_ADD_APP_HEADER // required to enable looping of animated gif
typedef struct {
@ -65,8 +67,15 @@ typedef struct {
/* we use the standard 216 color palette */
/* this script was used to create the palette:
* for r in 00 33 66 99 cc ff; do for g in 00 33 66 99 cc ff; do echo -n " "; for b in 00 33 66 99 cc ff; do
* echo -n "{ 0x$r, 0x$g, 0x$b }, "; done; echo ""; done; done
* for r in 00 33 66 99 cc ff; do
* for g in 00 33 66 99 cc ff; do
* echo -n " "
* for b in 00 33 66 99 cc ff; do
* echo -n "{ 0x$r, 0x$g, 0x$b }, "
* done
* echo ""
* done
* done
*/
static const rgb_triplet gif_clut[216] = {
@ -109,9 +118,8 @@ static const rgb_triplet gif_clut[216] = {
};
/* GIF header */
static int gif_image_write_header(AVIOContext *pb,
int width, int height, int loop_count,
uint32_t *palette)
static int gif_image_write_header(AVIOContext *pb, int width, int height,
int loop_count, uint32_t *palette)
{
int i;
unsigned int v;
@ -127,44 +135,45 @@ static int gif_image_write_header(AVIOContext *pb,
/* the global palette */
if (!palette) {
avio_write(pb, (const unsigned char *)gif_clut, 216*3);
for(i=0;i<((256-216)*3);i++)
avio_write(pb, (const unsigned char *)gif_clut, 216 * 3);
for (i = 0; i < ((256 - 216) * 3); i++)
avio_w8(pb, 0);
} else {
for(i=0;i<256;i++) {
for (i = 0; i < 256; i++) {
v = palette[i];
avio_w8(pb, (v >> 16) & 0xff);
avio_w8(pb, (v >> 8) & 0xff);
avio_w8(pb, (v) & 0xff);
avio_w8(pb, (v >> 8) & 0xff);
avio_w8(pb, (v) & 0xff);
}
}
/* update: this is the 'NETSCAPE EXTENSION' that allows for looped animated gif
see http://members.aol.com/royalef/gifabout.htm#net-extension
byte 1 : 33 (hex 0x21) GIF Extension code
byte 2 : 255 (hex 0xFF) Application Extension Label
byte 3 : 11 (hex (0x0B) Length of Application Block
(eleven bytes of data to follow)
bytes 4 to 11 : "NETSCAPE"
bytes 12 to 14 : "2.0"
byte 15 : 3 (hex 0x03) Length of Data Sub-Block
(three bytes of data to follow)
byte 16 : 1 (hex 0x01)
bytes 17 to 18 : 0 to 65535, an unsigned integer in
lo-hi byte format. This indicate the
number of iterations the loop should
be executed.
bytes 19 : 0 (hex 0x00) a Data Sub-block Terminator
*/
/* update: this is the 'NETSCAPE EXTENSION' that allows for looped animated
* GIF, see http://members.aol.com/royalef/gifabout.htm#net-extension
*
* byte 1 : 33 (hex 0x21) GIF Extension code
* byte 2 : 255 (hex 0xFF) Application Extension Label
* byte 3 : 11 (hex (0x0B) Length of Application Block
* (eleven bytes of data to follow)
* bytes 4 to 11 : "NETSCAPE"
* bytes 12 to 14 : "2.0"
* byte 15 : 3 (hex 0x03) Length of Data Sub-Block
* (three bytes of data to follow)
* byte 16 : 1 (hex 0x01)
* bytes 17 to 18 : 0 to 65535, an unsigned integer in
* lo-hi byte format. This indicate the
* number of iterations the loop should
* be executed.
* bytes 19 : 0 (hex 0x00) a Data Sub-block Terminator
*/
/* application extension header */
#ifdef GIF_ADD_APP_HEADER
if (loop_count >= 0 && loop_count <= 65535) {
avio_w8(pb, 0x21);
avio_w8(pb, 0xff);
avio_w8(pb, 0x0b);
avio_write(pb, "NETSCAPE2.0", sizeof("NETSCAPE2.0") - 1); // bytes 4 to 14
avio_w8(pb, 0x21);
avio_w8(pb, 0xff);
avio_w8(pb, 0x0b);
// bytes 4 to 14
avio_write(pb, "NETSCAPE2.0", sizeof("NETSCAPE2.0") - 1);
avio_w8(pb, 0x03); // byte 15
avio_w8(pb, 0x01); // byte 16
avio_wl16(pb, (uint16_t)loop_count);
@ -180,7 +189,6 @@ static inline unsigned char gif_clut_index(uint8_t r, uint8_t g, uint8_t b)
return (((r) / 47) % 6) * 6 * 6 + (((g) / 47) % 6) * 6 + (((b) / 47) % 6);
}
static int gif_image_write_image(AVIOContext *pb,
int x1, int y1, int width, int height,
const uint8_t *buf, int linesize, int pix_fmt)
@ -201,45 +209,44 @@ static int gif_image_write_image(AVIOContext *pb,
avio_w8(pb, 0x08);
left= width * height;
left = width * height;
init_put_bits(&p, buffer, 130);
/*
* the thing here is the bitstream is written as little packets, with a size byte before
* but it's still the same bitstream between packets (no flush !)
* the thing here is the bitstream is written as little packets, with a size
* byte before but it's still the same bitstream between packets (no flush !)
*/
ptr = buf;
w = width;
while(left>0) {
w = width;
while (left > 0) {
put_bits(&p, 9, 0x0100); /* clear code */
for(i=(left<GIF_CHUNKS)?left:GIF_CHUNKS;i;i--) {
for (i = (left < GIF_CHUNKS) ? left : GIF_CHUNKS; i; i--) {
if (pix_fmt == PIX_FMT_RGB24) {
v = gif_clut_index(ptr[0], ptr[1], ptr[2]);
ptr+=3;
v = gif_clut_index(ptr[0], ptr[1], ptr[2]);
ptr += 3;
} else {
v = *ptr++;
}
put_bits(&p, 9, v);
if (--w == 0) {
w = width;
w = width;
buf += linesize;
ptr = buf;
ptr = buf;
}
}
if(left<=GIF_CHUNKS) {
if (left <= GIF_CHUNKS) {
put_bits(&p, 9, 0x101); /* end of stream */
flush_put_bits(&p);
}
if(put_bits_ptr(&p) - p.buf > 0) {
if (put_bits_ptr(&p) - p.buf > 0) {
avio_w8(pb, put_bits_ptr(&p) - p.buf); /* byte count of the packet */
avio_write(pb, p.buf, put_bits_ptr(&p) - p.buf); /* the actual buffer */
p.buf_ptr = p.buf; /* dequeue the bytes off the bitstream */
}
left-=GIF_CHUNKS;
left -= GIF_CHUNKS;
}
avio_w8(pb, 0x00); /* end of image block */
@ -261,14 +268,14 @@ static int gif_write_header(AVFormatContext *s)
int i, width, height /*, rate*/;
/* XXX: do we reject audio streams or just ignore them ?
if(s->nb_streams > 1)
return -1;
*/
gif->time = 0;
* if (s->nb_streams > 1)
* return -1;
*/
gif->time = 0;
gif->file_time = 0;
video_enc = NULL;
for(i=0;i<s->nb_streams;i++) {
for (i = 0; i < s->nb_streams; i++) {
enc = s->streams[i]->codec;
if (enc->codec_type != AVMEDIA_TYPE_AUDIO)
video_enc = enc;
@ -278,13 +285,14 @@ static int gif_write_header(AVFormatContext *s)
av_free(gif);
return -1;
} else {
width = video_enc->width;
width = video_enc->width;
height = video_enc->height;
// rate = video_enc->time_base.den;
}
if (video_enc->pix_fmt != PIX_FMT_RGB24) {
av_log(s, AV_LOG_ERROR, "ERROR: gif only handles the rgb24 pixel format. Use -pix_fmt rgb24.\n");
av_log(s, AV_LOG_ERROR,
"ERROR: gif only handles the rgb24 pixel format. Use -pix_fmt rgb24.\n");
return AVERROR(EIO);
}
@ -294,8 +302,8 @@ static int gif_write_header(AVFormatContext *s)
return 0;
}
static int gif_write_video(AVFormatContext *s,
AVCodecContext *enc, const uint8_t *buf, int size)
static int gif_write_video(AVFormatContext *s, AVCodecContext *enc,
const uint8_t *buf, int size)
{
AVIOContext *pb = s->pb;
int jiffies;
@ -311,7 +319,7 @@ static int gif_write_video(AVFormatContext *s,
/* XXX: should use delay, in order to be more accurate */
/* instead of using the same rounded value each time */
/* XXX: don't even remember if I really use it for now */
jiffies = (70*enc->time_base.num/enc->time_base.den) - 1;
jiffies = (70 * enc->time_base.num / enc->time_base.den) - 1;
avio_wl16(pb, jiffies);
@ -346,7 +354,8 @@ static int gif_write_trailer(AVFormatContext *s)
#define OFFSET(x) offsetof(GIFContext, x)
#define ENC AV_OPT_FLAG_ENCODING_PARAM
static const AVOption options[] = {
{ "loop", "Number of times to loop the output.", OFFSET(loop), AV_OPT_TYPE_INT, {0}, 0, 65535, ENC },
{ "loop", "Number of times to loop the output.", OFFSET(loop),
AV_OPT_TYPE_INT, { 0 }, 0, 65535, ENC },
{ NULL },
};
@ -358,15 +367,15 @@ static const AVClass gif_muxer_class = {
};
AVOutputFormat ff_gif_muxer = {
.name = "gif",
.long_name = NULL_IF_CONFIG_SMALL("GIF Animation"),
.mime_type = "image/gif",
.extensions = "gif",
.priv_data_size = sizeof(GIFContext),
.audio_codec = CODEC_ID_NONE,
.video_codec = CODEC_ID_RAWVIDEO,
.write_header = gif_write_header,
.write_packet = gif_write_packet,
.write_trailer = gif_write_trailer,
.priv_class = &gif_muxer_class,
.name = "gif",
.long_name = NULL_IF_CONFIG_SMALL("GIF Animation"),
.mime_type = "image/gif",
.extensions = "gif",
.priv_data_size = sizeof(GIFContext),
.audio_codec = CODEC_ID_NONE,
.video_codec = CODEC_ID_RAWVIDEO,
.write_header = gif_write_header,
.write_packet = gif_write_packet,
.write_trailer = gif_write_trailer,
.priv_class = &gif_muxer_class,
};

@ -138,6 +138,11 @@ static enum CodecID av_str2id(const IdStrMap *tags, const char *str)
return CODEC_ID_NONE;
}
enum CodecID ff_guess_image2_codec(const char *filename)
{
return av_str2id(img_tags, filename);
}
/* return -1 if no image found */
static int find_image_range(int *pfirst_index, int *plast_index,
const char *path)
@ -194,7 +199,7 @@ static int find_image_range(int *pfirst_index, int *plast_index,
static int read_probe(AVProbeData *p)
{
if (p->filename && av_str2id(img_tags, p->filename)) {
if (p->filename && ff_guess_image2_codec(p->filename)) {
if (av_filename_number_test(p->filename))
return AVPROBE_SCORE_MAX;
else
@ -203,11 +208,6 @@ static int read_probe(AVProbeData *p)
return 0;
}
enum CodecID ff_guess_image2_codec(const char *filename)
{
return av_str2id(img_tags, filename);
}
static int read_header(AVFormatContext *s1)
{
VideoData *s = s1->priv_data;
@ -277,7 +277,7 @@ static int read_header(AVFormatContext *s1)
const char *str= strrchr(s->path, '.');
s->split_planes = str && !av_strcasecmp(str + 1, "y");
st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
st->codec->codec_id = av_str2id(img_tags, s->path);
st->codec->codec_id = ff_guess_image2_codec(s->path);
if (st->codec->codec_id == CODEC_ID_LJPEG)
st->codec->codec_id = CODEC_ID_MJPEG;
}
@ -419,7 +419,7 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
avio_close(pb[1]);
avio_close(pb[2]);
}else{
if(av_str2id(img_tags, s->filename) == CODEC_ID_JPEG2000){
if (ff_guess_image2_codec(s->filename) == CODEC_ID_JPEG2000) {
AVStream *st = s->streams[0];
if(st->codec->extradata_size > 8 &&
AV_RL32(st->codec->extradata+4) == MKTAG('j','p','2','h')){

@ -542,7 +542,7 @@ static EbmlSyntax matroska_clusters[] = {
{ 0 }
};
static const char *matroska_doctypes[] = { "matroska", "webm" };
static const char *const matroska_doctypes[] = { "matroska", "webm" };
static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
{

@ -2311,7 +2311,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
int64_t dts;
int data_offset = 0;
unsigned entries, first_sample_flags = frag->flags;
int flags, distance, i;
int flags, distance, i, found_keyframe = 0;
for (i = 0; i < c->fc->nb_streams; i++) {
if (c->fc->streams[i]->id == frag->track_id) {
@ -2365,7 +2365,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
unsigned sample_size = frag->size;
int sample_flags = i ? frag->flags : first_sample_flags;
unsigned sample_duration = frag->duration;
int keyframe;
int keyframe = 0;
if (flags & MOV_TRUN_SAMPLE_DURATION) sample_duration = avio_rb32(pb);
if (flags & MOV_TRUN_SAMPLE_SIZE) sample_size = avio_rb32(pb);
@ -2374,8 +2374,13 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
sc->ctts_data[sc->ctts_count].duration = (flags & MOV_TRUN_SAMPLE_CTS) ?
avio_rb32(pb) : 0;
sc->ctts_count++;
if ((keyframe = st->codec->codec_type == AVMEDIA_TYPE_AUDIO ||
(flags & MOV_TRUN_FIRST_SAMPLE_FLAGS && !i && !(sample_flags & ~MOV_FRAG_SAMPLE_FLAG_DEGRADATION_PRIORITY_MASK)) || sample_flags & MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO))
if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
keyframe = 1;
else if (!found_keyframe)
keyframe = found_keyframe =
!(sample_flags & (MOV_FRAG_SAMPLE_FLAG_IS_NON_SYNC |
MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES));
if (keyframe)
distance = 0;
av_add_index_entry(st, offset, dts, sample_size, distance,
keyframe ? AVINDEX_KEYFRAME : 0);

@ -65,14 +65,31 @@ static const struct {
{ "mono", 1, AV_CH_LAYOUT_MONO },
{ "stereo", 2, AV_CH_LAYOUT_STEREO },
{ "2.1", 3, AV_CH_LAYOUT_2POINT1 },
{ "3.0", 3, AV_CH_LAYOUT_SURROUND },
{ "3.0(back)", 3, AV_CH_LAYOUT_2_1 },
{ "4.0", 4, AV_CH_LAYOUT_4POINT0 },
{ "quad", 4, AV_CH_LAYOUT_QUAD },
{ "quad(side)", 4, AV_CH_LAYOUT_2_2 },
{ "3.1", 4, AV_CH_LAYOUT_3POINT1 },
{ "5.0", 5, AV_CH_LAYOUT_5POINT0_BACK },
{ "5.0(side)", 5, AV_CH_LAYOUT_5POINT0 },
{ "4.1", 5, AV_CH_LAYOUT_4POINT1 },
{ "5.1", 6, AV_CH_LAYOUT_5POINT1_BACK },
{ "5.1(side)", 6, AV_CH_LAYOUT_5POINT1 },
// { "5.1+downmix", 8, AV_CH_LAYOUT_5POINT1|AV_CH_LAYOUT_STEREO_DOWNMIX, },
{ "6.0", 6, AV_CH_LAYOUT_6POINT0 },
{ "6.0(front)", 6, AV_CH_LAYOUT_6POINT0_FRONT },
{ "hexagonal", 6, AV_CH_LAYOUT_HEXAGONAL },
{ "6.1", 7, AV_CH_LAYOUT_6POINT1 },
{ "6.1", 7, AV_CH_LAYOUT_6POINT1_BACK },
{ "6.1(front)", 7, AV_CH_LAYOUT_6POINT1_FRONT },
// { "6.1+downmix", 9, AV_CH_LAYOUT_6POINT1|AV_CH_LAYOUT_STEREO_DOWNMIX, },
{ "7.0", 7, AV_CH_LAYOUT_7POINT0 },
{ "7.0(front)", 7, AV_CH_LAYOUT_7POINT0_FRONT },
{ "7.1", 8, AV_CH_LAYOUT_7POINT1 },
{ "7.1(wide)", 8, AV_CH_LAYOUT_7POINT1_WIDE },
// { "7.1+downmix", 10, AV_CH_LAYOUT_7POINT1|AV_CH_LAYOUT_STEREO_DOWNMIX, },
{ "octagonal", 8, AV_CH_LAYOUT_OCTAGONAL },
{ "downmix", 2, AV_CH_LAYOUT_STEREO_DOWNMIX, },
};

@ -622,13 +622,13 @@ void av_free_expr(AVExpr *e)
#undef printf
#include <string.h>
static double const_values[] = {
static const double const_values[] = {
M_PI,
M_E,
0
};
static const char *const_names[] = {
static const char *const const_names[] = {
"PI",
"E",
0

@ -22,6 +22,7 @@
*/
#include <inttypes.h>
#include "config.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
@ -30,8 +31,8 @@
#define vzero vec_splat_s32(0)
static inline void
altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
static inline void altivec_packIntArrayToCharArray(int *val, uint8_t *dest,
int dstW)
{
register int i;
vector unsigned int altivec_vectorShiftInt19 =
@ -41,106 +42,104 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
/* and will handle load misalignment on val w/ vec_perm */
vector unsigned char perm1;
vector signed int v1;
for (i = 0 ; (i < dstW) &&
(((uintptr_t)dest + i) % 16) ; i++) {
int t = val[i] >> 19;
dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
for (i = 0; (i < dstW) &&
(((uintptr_t)dest + i) % 16); i++) {
int t = val[i] >> 19;
dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
}
perm1 = vec_lvsl(i << 2, val);
v1 = vec_ld(i << 2, val);
for ( ; i < (dstW - 15); i+=16) {
v1 = vec_ld(i << 2, val);
for (; i < (dstW - 15); i += 16) {
int offset = i << 2;
vector signed int v2 = vec_ld(offset + 16, val);
vector signed int v3 = vec_ld(offset + 32, val);
vector signed int v4 = vec_ld(offset + 48, val);
vector signed int v5 = vec_ld(offset + 64, val);
vector signed int v2 = vec_ld(offset + 16, val);
vector signed int v3 = vec_ld(offset + 32, val);
vector signed int v4 = vec_ld(offset + 48, val);
vector signed int v5 = vec_ld(offset + 64, val);
vector signed int v12 = vec_perm(v1, v2, perm1);
vector signed int v23 = vec_perm(v2, v3, perm1);
vector signed int v34 = vec_perm(v3, v4, perm1);
vector signed int v45 = vec_perm(v4, v5, perm1);
vector signed int vA = vec_sra(v12, altivec_vectorShiftInt19);
vector signed int vB = vec_sra(v23, altivec_vectorShiftInt19);
vector signed int vC = vec_sra(v34, altivec_vectorShiftInt19);
vector signed int vD = vec_sra(v45, altivec_vectorShiftInt19);
vector signed int vA = vec_sra(v12, altivec_vectorShiftInt19);
vector signed int vB = vec_sra(v23, altivec_vectorShiftInt19);
vector signed int vC = vec_sra(v34, altivec_vectorShiftInt19);
vector signed int vD = vec_sra(v45, altivec_vectorShiftInt19);
vector unsigned short vs1 = vec_packsu(vA, vB);
vector unsigned short vs2 = vec_packsu(vC, vD);
vector unsigned char vf = vec_packsu(vs1, vs2);
vector unsigned char vf = vec_packsu(vs1, vs2);
vec_st(vf, i, dest);
v1 = v5;
}
} else { // dest is properly aligned, great
for (i = 0; i < (dstW - 15); i+=16) {
for (i = 0; i < (dstW - 15); i += 16) {
int offset = i << 2;
vector signed int v1 = vec_ld(offset, val);
vector signed int v2 = vec_ld(offset + 16, val);
vector signed int v3 = vec_ld(offset + 32, val);
vector signed int v4 = vec_ld(offset + 48, val);
vector signed int v5 = vec_sra(v1, altivec_vectorShiftInt19);
vector signed int v6 = vec_sra(v2, altivec_vectorShiftInt19);
vector signed int v7 = vec_sra(v3, altivec_vectorShiftInt19);
vector signed int v8 = vec_sra(v4, altivec_vectorShiftInt19);
vector signed int v1 = vec_ld(offset, val);
vector signed int v2 = vec_ld(offset + 16, val);
vector signed int v3 = vec_ld(offset + 32, val);
vector signed int v4 = vec_ld(offset + 48, val);
vector signed int v5 = vec_sra(v1, altivec_vectorShiftInt19);
vector signed int v6 = vec_sra(v2, altivec_vectorShiftInt19);
vector signed int v7 = vec_sra(v3, altivec_vectorShiftInt19);
vector signed int v8 = vec_sra(v4, altivec_vectorShiftInt19);
vector unsigned short vs1 = vec_packsu(v5, v6);
vector unsigned short vs2 = vec_packsu(v7, v8);
vector unsigned char vf = vec_packsu(vs1, vs2);
vector unsigned char vf = vec_packsu(vs1, vs2);
vec_st(vf, i, dest);
}
}
for ( ; i < dstW ; i++) {
for (; i < dstW; i++) {
int t = val[i] >> 19;
dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
}
}
//FIXME remove the usage of scratch buffers.
static void
yuv2planeX_altivec(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
// FIXME remove the usage of scratch buffers.
static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
register int i, j;
DECLARE_ALIGNED(16, int, val)[dstW];
DECLARE_ALIGNED(16, int, val)[dstW];
for (i=0; i<dstW; i++)
val[i] = dither[(i + offset) & 7] << 12;
for (i = 0; i < dstW; i++)
val[i] = dither[(i + offset) & 7] << 12;
for (j = 0; j < filterSize; j++) {
vector signed short l1, vLumFilter = vec_ld(j << 1, filter);
vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter);
vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
for (j = 0; j < filterSize; j++) {
vector signed short l1, vLumFilter = vec_ld(j << 1, filter);
vector unsigned char perm, perm0 = vec_lvsl(j << 1, filter);
vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
perm = vec_lvsl(0, src[j]);
l1 = vec_ld(0, src[j]);
perm = vec_lvsl(0, src[j]);
l1 = vec_ld(0, src[j]);
for (i = 0; i < (dstW - 7); i+=8) {
int offset = i << 2;
vector signed short l2 = vec_ld((i << 1) + 16, src[j]);
for (i = 0; i < (dstW - 7); i += 8) {
int offset = i << 2;
vector signed short l2 = vec_ld((i << 1) + 16, src[j]);
vector signed int v1 = vec_ld(offset, val);
vector signed int v2 = vec_ld(offset + 16, val);
vector signed int v1 = vec_ld(offset, val);
vector signed int v2 = vec_ld(offset + 16, val);
vector signed short ls = vec_perm(l1, l2, perm); // lumSrc[j][i] ... lumSrc[j][i+7]
vector signed short ls = vec_perm(l1, l2, perm); // lumSrc[j][i] ... lumSrc[j][i+7]
vector signed int i1 = vec_mule(vLumFilter, ls);
vector signed int i2 = vec_mulo(vLumFilter, ls);
vector signed int i1 = vec_mule(vLumFilter, ls);
vector signed int i2 = vec_mulo(vLumFilter, ls);
vector signed int vf1 = vec_mergeh(i1, i2);
vector signed int vf2 = vec_mergel(i1, i2); // lumSrc[j][i] * lumFilter[j] ... lumSrc[j][i+7] * lumFilter[j]
vector signed int vf1 = vec_mergeh(i1, i2);
vector signed int vf2 = vec_mergel(i1, i2); // lumSrc[j][i] * lumFilter[j] ... lumSrc[j][i+7] * lumFilter[j]
vector signed int vo1 = vec_add(v1, vf1);
vector signed int vo2 = vec_add(v2, vf2);
vector signed int vo1 = vec_add(v1, vf1);
vector signed int vo2 = vec_add(v2, vf2);
vec_st(vo1, offset, val);
vec_st(vo2, offset + 16, val);
vec_st(vo1, offset, val);
vec_st(vo2, offset + 16, val);
l1 = l2;
}
for ( ; i < dstW; i++) {
val[i] += src[j][i] * filter[j];
}
l1 = l2;
}
altivec_packIntArrayToCharArray(val, dest, dstW);
for (; i < dstW; i++)
val[i] += src[j][i] * filter[j];
}
altivec_packIntArrayToCharArray(val, dest, dstW);
}
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
@ -151,167 +150,164 @@ static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
DECLARE_ALIGNED(16, int, tempo)[4];
if (filterSize % 4) {
for (i=0; i<dstW; i++) {
for (i = 0; i < dstW; i++) {
register int j;
register int srcPos = filterPos[i];
register int val = 0;
for (j=0; j<filterSize; j++) {
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
}
dst[i] = FFMIN(val>>7, (1<<15)-1);
}
}
else
switch (filterSize) {
case 4:
for (i=0; i<dstW; i++) {
register int srcPos = filterPos[i];
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char src_v1, src_vF;
vector signed short src_v, filter_v;
vector signed int val_vEven, val_s;
if ((((uintptr_t)src + srcPos) % 16) > 12) {
src_v1 = vec_ld(srcPos + 16, src);
}
src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
src_v = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
// now put our elements in the even slots
src_v = vec_mergeh(src_v, (vector signed short)vzero);
filter_v = vec_ld(i << 3, filter);
// The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
// The neat trick: We only care for half the elements,
// high or low depending on (i<<3)%16 (it's 0 or 8 here),
// and we're going to use vec_mule, so we choose
// carefully how to "unpack" the elements into the even slots.
if ((i << 3) % 16)
filter_v = vec_mergel(filter_v, (vector signed short)vzero);
else
filter_v = vec_mergeh(filter_v, (vector signed short)vzero);
val_vEven = vec_mule(src_v, filter_v);
val_s = vec_sums(val_vEven, vzero);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
break;
case 8:
for (i=0; i<dstW; i++) {
register int srcPos = filterPos[i];
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char src_v1, src_vF;
vector signed short src_v, filter_v;
vector signed int val_v, val_s;
if ((((uintptr_t)src + srcPos) % 16) > 8) {
src_v1 = vec_ld(srcPos + 16, src);
}
src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
src_v = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
filter_v = vec_ld(i << 4, filter);
// the 4 above is 3 (filterSize == 8) + 1 (sizeof(short) == 2)
val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
break;
case 16:
for (i=0; i<dstW; i++) {
register int srcPos = filterPos[i];
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char src_v1 = vec_ld(srcPos + 16, src);
vector unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
vector signed short src_vA = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
vector signed short src_vB = // vec_unpackh sign-extends...
(vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
vector signed short filter_v0 = vec_ld(i << 5, filter);
vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
// the 5 above are 4 (filterSize == 16) + 1 (sizeof(short) == 2)
vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
vector signed int val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
register int val = 0;
for (j = 0; j < filterSize; j++)
val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
}
break;
default:
for (i=0; i<dstW; i++) {
register int j;
register int srcPos = filterPos[i];
vector signed int val_s, val_v = (vector signed int)vzero;
vector signed short filter_v0R = vec_ld(i * 2 * filterSize, filter);
vector unsigned char permF = vec_lvsl((i * 2 * filterSize), filter);
} else
switch (filterSize) {
case 4:
for (i = 0; i < dstW; i++) {
register int srcPos = filterPos[i];
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char src_v1, src_vF;
vector signed short src_v, filter_v;
vector signed int val_vEven, val_s;
if ((((uintptr_t)src + srcPos) % 16) > 12) {
src_v1 = vec_ld(srcPos + 16, src);
}
src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
src_v = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
// now put our elements in the even slots
src_v = vec_mergeh(src_v, (vector signed short)vzero);
filter_v = vec_ld(i << 3, filter);
// The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
// The neat trick: We only care for half the elements,
// high or low depending on (i<<3)%16 (it's 0 or 8 here),
// and we're going to use vec_mule, so we choose
// carefully how to "unpack" the elements into the even slots.
if ((i << 3) % 16)
filter_v = vec_mergel(filter_v, (vector signed short)vzero);
else
filter_v = vec_mergeh(filter_v, (vector signed short)vzero);
val_vEven = vec_mule(src_v, filter_v);
val_s = vec_sums(val_vEven, vzero);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
}
break;
case 8:
for (i = 0; i < dstW; i++) {
register int srcPos = filterPos[i];
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char src_v1, src_vF;
vector signed short src_v, filter_v;
vector signed int val_v, val_s;
if ((((uintptr_t)src + srcPos) % 16) > 8) {
src_v1 = vec_ld(srcPos + 16, src);
}
src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
src_v = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
filter_v = vec_ld(i << 4, filter);
// the 4 above is 3 (filterSize == 8) + 1 (sizeof(short) == 2)
val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
}
break;
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char permS = vec_lvsl(srcPos, src);
case 16:
for (i = 0; i < dstW; i++) {
register int srcPos = filterPos[i];
for (j = 0 ; j < filterSize - 15; j += 16) {
vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char src_v1 = vec_ld(srcPos + 16, src);
vector unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
vector signed short src_vA = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
vector signed short src_vB = // vec_unpackh sign-extends...
(vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
vector signed short src_vA = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
vector signed short src_vB = // vec_unpackh sign-extends...
(vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
vector signed short filter_v0 = vec_perm(filter_v0R, filter_v1R, permF);
vector signed short filter_v1 = vec_perm(filter_v1R, filter_v2R, permF);
vector signed short filter_v0 = vec_ld(i << 5, filter);
vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
// the 5 above are 4 (filterSize == 16) + 1 (sizeof(short) == 2)
vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
val_v = vec_msums(src_vB, filter_v1, val_acc);
vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
filter_v0R = filter_v2R;
src_v0 = src_v1;
}
vector signed int val_s = vec_sums(val_v, vzero);
if (j < filterSize-7) {
// loading src_v0 is useless, it's already done above
//vector unsigned char src_v0 = vec_ld(srcPos + j, src);
vector unsigned char src_v1, src_vF;
vector signed short src_v, filter_v1R, filter_v;
if ((((uintptr_t)src + srcPos) % 16) > 8) {
src_v1 = vec_ld(srcPos + j + 16, src);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
}
break;
default:
for (i = 0; i < dstW; i++) {
register int j;
register int srcPos = filterPos[i];
vector signed int val_s, val_v = (vector signed int)vzero;
vector signed short filter_v0R = vec_ld(i * 2 * filterSize, filter);
vector unsigned char permF = vec_lvsl((i * 2 * filterSize), filter);
vector unsigned char src_v0 = vec_ld(srcPos, src);
vector unsigned char permS = vec_lvsl(srcPos, src);
for (j = 0; j < filterSize - 15; j += 16) {
vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
vector signed short src_vA = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
vector signed short src_vB = // vec_unpackh sign-extends...
(vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
vector signed short filter_v0 = vec_perm(filter_v0R, filter_v1R, permF);
vector signed short filter_v1 = vec_perm(filter_v1R, filter_v2R, permF);
vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
val_v = vec_msums(src_vB, filter_v1, val_acc);
filter_v0R = filter_v2R;
src_v0 = src_v1;
}
if (j < filterSize - 7) {
// loading src_v0 is useless, it's already done above
// vector unsigned char src_v0 = vec_ld(srcPos + j, src);
vector unsigned char src_v1, src_vF;
vector signed short src_v, filter_v1R, filter_v;
if ((((uintptr_t)src + srcPos) % 16) > 8) {
src_v1 = vec_ld(srcPos + j + 16, src);
}
src_vF = vec_perm(src_v0, src_v1, permS);
src_v = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
// loading filter_v0R is useless, it's already done above
// vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter);
filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
filter_v = vec_perm(filter_v0R, filter_v1R, permF);
val_v = vec_msums(src_v, filter_v, val_v);
}
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
}
src_vF = vec_perm(src_v0, src_v1, permS);
src_v = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
// loading filter_v0R is useless, it's already done above
//vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter);
filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
filter_v = vec_perm(filter_v0R, filter_v1R, permF);
val_v = vec_msums(src_v, filter_v, val_v);
}
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
}
void ff_sws_init_swScale_altivec(SwsContext *c)
@ -334,12 +330,24 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
* match what's found in the body of ff_yuv2packedX_altivec() */
if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) {
switch (c->dstFormat) {
case PIX_FMT_ABGR: c->yuv2packedX = ff_yuv2abgr_X_altivec; break;
case PIX_FMT_BGRA: c->yuv2packedX = ff_yuv2bgra_X_altivec; break;
case PIX_FMT_ARGB: c->yuv2packedX = ff_yuv2argb_X_altivec; break;
case PIX_FMT_RGBA: c->yuv2packedX = ff_yuv2rgba_X_altivec; break;
case PIX_FMT_BGR24: c->yuv2packedX = ff_yuv2bgr24_X_altivec; break;
case PIX_FMT_RGB24: c->yuv2packedX = ff_yuv2rgb24_X_altivec; break;
case PIX_FMT_ABGR:
c->yuv2packedX = ff_yuv2abgr_X_altivec;
break;
case PIX_FMT_BGRA:
c->yuv2packedX = ff_yuv2bgra_X_altivec;
break;
case PIX_FMT_ARGB:
c->yuv2packedX = ff_yuv2argb_X_altivec;
break;
case PIX_FMT_RGBA:
c->yuv2packedX = ff_yuv2rgba_X_altivec;
break;
case PIX_FMT_BGR24:
c->yuv2packedX = ff_yuv2bgr24_X_altivec;
break;
case PIX_FMT_RGB24:
c->yuv2packedX = ff_yuv2rgb24_X_altivec;
break;
}
}
}

@ -22,55 +22,57 @@
*/
#include <inttypes.h>
#include "config.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/cpu.h"
static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[],
static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[],
int srcSliceH, uint8_t *dstParam[],
int dstStride_a[])
{
uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
const uint8_t *ysrc = src[0];
const uint8_t *usrc = src[1];
const uint8_t *vsrc = src[2];
const int width = c->srcW;
const int height = srcSliceH;
const int lumStride = srcStride[0];
uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
// srcStride[0], srcStride[1], dstStride[0]);
const uint8_t *ysrc = src[0];
const uint8_t *usrc = src[1];
const uint8_t *vsrc = src[2];
const int width = c->srcW;
const int height = srcSliceH;
const int lumStride = srcStride[0];
const int chromStride = srcStride[1];
const int dstStride = dstStride_a[0];
const int dstStride = dstStride_a[0];
const vector unsigned char yperm = vec_lvsl(0, ysrc);
const int vertLumPerChroma = 2;
const int vertLumPerChroma = 2;
register unsigned int y;
/* This code assumes:
*
* 1) dst is 16 bytes-aligned
* 2) dstStride is a multiple of 16
* 3) width is a multiple of 16
* 4) lum & chrom stride are multiples of 8
*/
1) dst is 16 bytes-aligned
2) dstStride is a multiple of 16
3) width is a multiple of 16
4) lum & chrom stride are multiples of 8
*/
for (y=0; y<height; y++) {
for (y = 0; y < height; y++) {
int i;
for (i = 0; i < width - 31; i+= 32) {
const unsigned int j = i >> 1;
vector unsigned char v_yA = vec_ld(i, ysrc);
vector unsigned char v_yB = vec_ld(i + 16, ysrc);
vector unsigned char v_yC = vec_ld(i + 32, ysrc);
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
vector unsigned char v_uA = vec_ld(j, usrc);
vector unsigned char v_uB = vec_ld(j + 16, usrc);
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
vector unsigned char v_vA = vec_ld(j, vsrc);
vector unsigned char v_vB = vec_ld(j + 16, vsrc);
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
for (i = 0; i < width - 31; i += 32) {
const unsigned int j = i >> 1;
vector unsigned char v_yA = vec_ld(i, ysrc);
vector unsigned char v_yB = vec_ld(i + 16, ysrc);
vector unsigned char v_yC = vec_ld(i + 32, ysrc);
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
vector unsigned char v_uA = vec_ld(j, usrc);
vector unsigned char v_uB = vec_ld(j + 16, usrc);
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
vector unsigned char v_vA = vec_ld(j, vsrc);
vector unsigned char v_vB = vec_ld(j + 16, vsrc);
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
@ -81,71 +83,72 @@ static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[],
vec_st(v_yuy2_3, (i << 1) + 48, dst);
}
if (i < width) {
const unsigned int j = i >> 1;
vector unsigned char v_y1 = vec_ld(i, ysrc);
vector unsigned char v_u = vec_ld(j, usrc);
vector unsigned char v_v = vec_ld(j, vsrc);
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
const unsigned int j = i >> 1;
vector unsigned char v_y1 = vec_ld(i, ysrc);
vector unsigned char v_u = vec_ld(j, usrc);
vector unsigned char v_v = vec_ld(j, vsrc);
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
vec_st(v_yuy2_0, (i << 1), dst);
vec_st(v_yuy2_1, (i << 1) + 16, dst);
}
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
dst += dstStride;
}
return srcSliceH;
}
static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[],
static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[],
int srcSliceH, uint8_t *dstParam[],
int dstStride_a[])
{
uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
const uint8_t *ysrc = src[0];
const uint8_t *usrc = src[1];
const uint8_t *vsrc = src[2];
const int width = c->srcW;
const int height = srcSliceH;
const int lumStride = srcStride[0];
const int chromStride = srcStride[1];
const int dstStride = dstStride_a[0];
const int vertLumPerChroma = 2;
uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
// srcStride[0], srcStride[1], dstStride[0]);
const uint8_t *ysrc = src[0];
const uint8_t *usrc = src[1];
const uint8_t *vsrc = src[2];
const int width = c->srcW;
const int height = srcSliceH;
const int lumStride = srcStride[0];
const int chromStride = srcStride[1];
const int dstStride = dstStride_a[0];
const int vertLumPerChroma = 2;
const vector unsigned char yperm = vec_lvsl(0, ysrc);
register unsigned int y;
/* This code assumes:
*
* 1) dst is 16 bytes-aligned
* 2) dstStride is a multiple of 16
* 3) width is a multiple of 16
* 4) lum & chrom stride are multiples of 8
*/
1) dst is 16 bytes-aligned
2) dstStride is a multiple of 16
3) width is a multiple of 16
4) lum & chrom stride are multiples of 8
*/
for (y=0; y<height; y++) {
for (y = 0; y < height; y++) {
int i;
for (i = 0; i < width - 31; i+= 32) {
const unsigned int j = i >> 1;
vector unsigned char v_yA = vec_ld(i, ysrc);
vector unsigned char v_yB = vec_ld(i + 16, ysrc);
vector unsigned char v_yC = vec_ld(i + 32, ysrc);
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
vector unsigned char v_uA = vec_ld(j, usrc);
vector unsigned char v_uB = vec_ld(j + 16, usrc);
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
vector unsigned char v_vA = vec_ld(j, vsrc);
vector unsigned char v_vB = vec_ld(j + 16, vsrc);
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
for (i = 0; i < width - 31; i += 32) {
const unsigned int j = i >> 1;
vector unsigned char v_yA = vec_ld(i, ysrc);
vector unsigned char v_yB = vec_ld(i + 16, ysrc);
vector unsigned char v_yC = vec_ld(i + 32, ysrc);
vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
vector unsigned char v_uA = vec_ld(j, usrc);
vector unsigned char v_uB = vec_ld(j + 16, usrc);
vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
vector unsigned char v_vA = vec_ld(j, vsrc);
vector unsigned char v_vB = vec_ld(j + 16, vsrc);
vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
@ -156,22 +159,22 @@ static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[],
vec_st(v_uyvy_3, (i << 1) + 48, dst);
}
if (i < width) {
const unsigned int j = i >> 1;
vector unsigned char v_y1 = vec_ld(i, ysrc);
vector unsigned char v_u = vec_ld(j, usrc);
vector unsigned char v_v = vec_ld(j, vsrc);
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
const unsigned int j = i >> 1;
vector unsigned char v_y1 = vec_ld(i, ysrc);
vector unsigned char v_u = vec_ld(j, usrc);
vector unsigned char v_v = vec_ld(j, vsrc);
vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
vec_st(v_uyvy_0, (i << 1), dst);
vec_st(v_uyvy_1, (i << 1) + 16, dst);
}
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
dst += dstStride;
}
return srcSliceH;
}
@ -184,8 +187,8 @@ void ff_swscale_get_unscaled_altivec(SwsContext *c)
// unscaled YV12 -> packed YUV, we want speed
if (dstFormat == PIX_FMT_YUYV422)
c->swScale= yv12toyuy2_unscaled_altivec;
c->swScale = yv12toyuy2_unscaled_altivec;
else if (dstFormat == PIX_FMT_UYVY422)
c->swScale= yv12touyvy_unscaled_altivec;
c->swScale = yv12touyvy_unscaled_altivec;
}
}

@ -283,6 +283,11 @@ do_video_encoding prores.mov "-vcodec prores"
do_video_decoding "" "-pix_fmt yuv420p"
fi
if [ -n "$do_prores_kostya" ] ; then
do_video_encoding prores_kostya.mov "-vcodec prores_kostya -profile hq"
do_video_decoding "" "-pix_fmt yuv420p"
fi
if [ -n "$do_svq1" ] ; then
do_video_encoding svq1.mov "-an -vcodec svq1 -qscale 3 -pix_fmt yuv410p"
do_video_decoding "" "-pix_fmt yuv420p"

@ -0,0 +1,4 @@
40e7637e04991dbe9a23fe109f95bfc8 *./tests/data/vsynth1/prores_kostya.mov
3858901 ./tests/data/vsynth1/prores_kostya.mov
0a4153637d0cc0a88a8bcbf04cfaf8c6 *./tests/data/prores_kostya.vsynth1.out.yuv
stddev: 3.17 PSNR: 38.09 MAXDIFF: 39 bytes: 7603200/ 7603200

@ -0,0 +1,4 @@
ed8b8a94da049518af8f95c5da736e57 *./tests/data/vsynth2/prores_kostya.mov
3884586 ./tests/data/vsynth2/prores_kostya.mov
ca2f6c1162635dedfa468c90f1fdc0ef *./tests/data/prores_kostya.vsynth2.out.yuv
stddev: 0.92 PSNR: 48.77 MAXDIFF: 10 bytes: 7603200/ 7603200
Loading…
Cancel
Save