Merge remote-tracking branch 'qatar/master'

* qatar/master: (46 commits) mtv: Make sure audio_subsegments is not 0 v4l2: use V4L2_FMT_FLAG_EMULATED only if it is defined avconv: add symbolic names for -vsync parameters flvdec: Fix compiler warning for uninitialized variables rtsp: Fix compiler warning for uninitialized variable ulti: convert to new bytestream API. swscale: Use standard multiple inclusion guards in ppc/ header files. Place some START_TIMER invocations in separate blocks. v4l2: list available formats v4l2: set the proper codec_tag v4l2: refactor device_open v4l2: simplify away io_method v4l2: cosmetics v4l2: uniform and format options v4l2: do not force interlaced mode avio: exit early in fill_buffer without read_packet vc1dec: fix invalid memory access for small video dimensions rv34: fix invalid memory access for small video dimensions rv34: joint coefficient decoding and dequantization avplay: Don't call avio_set_interrupt_cb(NULL) ... Conflicts: Changelog avconv.c doc/APIchanges doc/indevs.texi libavcodec/adxenc.c libavcodec/dnxhdenc.c libavcodec/h264.c libavdevice/v4l2.c libavformat/flvdec.c libavformat/mtv.c libswscale/utils.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
13 years ago · 7f83db3124
parent c4eec85a1f feb15cee5e
commit 7f83db3124
40 changed files with 964 additions and 942 deletions
--- a/2
+++ b/2
@ -145,7 +145,7 @@ easier to use. The changes are:
 - pan audio filter
 - IFF Amiga Continuous Bitmap (ACBM) decoder
 - ass filter
- CRI ADX audio format demuxer
+- CRI ADX audio format muxer and demuxer
 - Playstation Portable PMP format demuxer
 - Microsoft Windows ICO demuxer
 - life source
--- a/avconv.c
+++ b/avconv.c
@ -88,6 +88,11 @@

 #include "libavutil/avassert.h"

+#define VSYNC_AUTO       -1
+#define VSYNC_PASSTHROUGH 0
+#define VSYNC_CFR         1
+#define VSYNC_VFR         2
+
 const char program_name[] = "avconv";
 const int program_birth_year = 2000;

@ -127,7 +132,7 @@ static int do_hex_dump = 0;
 static int do_pkt_dump = 0;
 static int do_pass = 0;
 static const char *pass_logfilename_prefix;
-static int video_sync_method = -1;
+static int video_sync_method = VSYNC_AUTO;
 static int audio_sync_method = 0;
 static float audio_drift_threshold = 0.1;
 static int copy_ts = 0;
@ -1390,16 +1395,16 @@ static void do_video_out(AVFormatContext *s,
    *frame_size = 0;

    format_video_sync = video_sync_method;
-    if (format_video_sync < 0)
-        format_video_sync = (s->oformat->flags & AVFMT_NOTIMESTAMPS) ? 0 :
-                            (s->oformat->flags & AVFMT_VARIABLE_FPS) ? 2 : 1;
+    if (format_video_sync == VSYNC_AUTO)
+        format_video_sync = (s->oformat->flags & AVFMT_NOTIMESTAMPS) ? VSYNC_PASSTHROUGH :
+                            (s->oformat->flags & AVFMT_VARIABLE_FPS) ? VSYNC_VFR : VSYNC_CFR;

-    if (format_video_sync) {
+    if (format_video_sync != VSYNC_PASSTHROUGH) {
        double vdelta = sync_ipts - ost->sync_opts;
        // FIXME set to 0.5 after we fix some dts/pts bugs like in avidec.c
        if (vdelta < -1.1)
            nb_frames = 0;
-        else if (format_video_sync == 2) {
+        else if (format_video_sync == VSYNC_VFR) {
            if (vdelta <= -0.6) {
                nb_frames = 0;
            } else if (vdelta > 0.6)
@ -4426,6 +4431,17 @@ static int opt_video_filters(OptionsContext *o, const char *opt, const char *arg
    return parse_option(o, "filter:v", arg, options);
 }

+static int opt_vsync(const char *opt, const char *arg)
+{
+    if      (!av_strcasecmp(arg, "cfr"))         video_sync_method = VSYNC_CFR;
+    else if (!av_strcasecmp(arg, "vfr"))         video_sync_method = VSYNC_VFR;
+    else if (!av_strcasecmp(arg, "passthrough")) video_sync_method = VSYNC_PASSTHROUGH;
+
+    if (video_sync_method == VSYNC_AUTO)
+        video_sync_method = parse_number_or_die("vsync", arg, OPT_INT, VSYNC_AUTO, VSYNC_VFR);
+    return 0;
+}
+
 #define OFFSET(x) offsetof(OptionsContext, x)
 static const OptionDef options[] = {
    /* main options */
@ -4457,7 +4473,7 @@ static const OptionDef options[] = {
      "when dumping packets, also dump the payload" },
    { "re", OPT_BOOL | OPT_EXPERT | OPT_OFFSET, {.off = OFFSET(rate_emu)}, "read input at native frame rate", "" },
    { "target", HAS_ARG | OPT_FUNC2, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\", \"dvd\", \"dv\", \"dv50\", \"pal-vcd\", \"ntsc-svcd\", ...)", "type" },
-    { "vsync", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_sync_method}, "video sync method", "" },
+    { "vsync", HAS_ARG | OPT_EXPERT, {(void*)opt_vsync}, "video sync method", "" },
    { "async", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&audio_sync_method}, "audio sync method", "" },
    { "adrift_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT, {(void*)&audio_drift_threshold}, "audio drift threshold", "threshold" },
    { "copyts", OPT_BOOL | OPT_EXPERT, {(void*)&copy_ts}, "copy timestamps" },
--- a/3
+++ b/3
@ -1226,6 +1226,7 @@ HAVE_LIST="
    struct_sockaddr_in6
    struct_sockaddr_sa_len
    struct_sockaddr_storage
+    struct_v4l2_frmivalenum_discrete
    symver
    symver_asm_label
    symver_gnu_asm
@ -3174,6 +3175,8 @@ makeinfo --version > /dev/null 2>&1 && enable makeinfo  || disable makeinfo
 check_header linux/fb.h
 check_header linux/videodev.h
 check_header linux/videodev2.h
+check_struct linux/videodev2.h "struct v4l2_frmivalenum" discrete
+
 check_header sys/videoio.h

 check_func_headers "windows.h vfw.h" capCreateCaptureWindow "$vfwcap_indev_extralibs"
--- a/doc/APIchanges
+++ b/doc/APIchanges
@ -174,6 +174,10 @@ API changes, most recent first:
 2011-08-14 - 323b930 - lavu 51.12.0
  Add av_fifo_peek2(), deprecate av_fifo_peek().

+2011-08-26 - lavu 51.9.0
+  - add41de..abc78a5 Do not include intfloat_readwrite.h,
+    mathematics.h, rational.h, pixfmt.h, or log.h from avutil.h.
+
 2011-08-16 - 48f9e45 - lavf 53.8.0
  Add avformat_query_codec().

--- a/doc/avconv.texi
+++ b/doc/avconv.texi
@ -749,15 +749,15 @@ Thread count.
 Video sync method.

@table @option
-@item 0
+@item passthrough
 Each frame is passed with its timestamp from the demuxer to the muxer.
-@item 1
+@item cfr
 Frames will be duplicated and dropped to achieve exactly the requested
 constant framerate.
-@item 2
+@item vfr
 Frames are passed through with their timestamp or dropped so as to
 prevent 2 frames from having the same timestamp.
-@item -1
+@item auto
 Chooses between 1 and 2 depending on muxer capabilities. This is the
 default method.
@end table
--- a/doc/general.texi
+++ b/doc/general.texi
@ -134,7 +134,7 @@ library:
@item Brute Force & Ignorance   @tab   @tab X
    @tab Used in the game Flash Traffic: City of Angels.
@item BWF                       @tab X @tab X
-@item CRI ADX                   @tab   @tab X
+@item CRI ADX                   @tab X @tab X
    @tab Audio-only format used in console video games.
@item Discworld II BMV          @tab   @tab X
@item Interplay C93             @tab   @tab X
--- a/doc/indevs.texi
+++ b/doc/indevs.texi
@ -515,9 +515,9 @@ kind @file{/dev/video@var{N}}, where @var{N} is a number associated to
 the device.

 Video4Linux and Video4Linux2 devices only support a limited set of
-@var{width}x@var{height} sizes and frame rates. You can check which are
+@var{width}x@var{height} sizes and framerates. You can check which are
 supported for example with the command @command{dov4l} for Video4Linux
-devices and the command @command{v4l-info} for Video4Linux2 devices.
+devices and using @command{-list_formats all} for Video4Linux2 devices.

 If the size for the device is set to 0x0, the input device will
 try to auto-detect the size to use.
--- a/ffmpeg.c
+++ b/ffmpeg.c
@ -93,6 +93,11 @@

 #include "libavutil/avassert.h"

+#define VSYNC_AUTO       -1
+#define VSYNC_PASSTHROUGH 0
+#define VSYNC_CFR         1
+#define VSYNC_VFR         2
+
 const char program_name[] = "ffmpeg";
 const int program_birth_year = 2000;

@ -144,7 +149,7 @@ static int do_pkt_dump = 0;
 static int do_psnr = 0;
 static int do_pass = 0;
 static const char *pass_logfilename_prefix;
-static int video_sync_method = -1;
+static int video_sync_method = VSYNC_AUTO;
 static int audio_sync_method = 0;
 static float audio_drift_threshold = 0.1;
 static int copy_ts = 0;
@ -1433,15 +1438,15 @@ static void do_video_out(AVFormatContext *s,
    *frame_size = 0;

    format_video_sync = video_sync_method;
-    if (format_video_sync < 0)
-        format_video_sync = (s->oformat->flags & AVFMT_VARIABLE_FPS) ? ((s->oformat->flags & AVFMT_NOTIMESTAMPS) ? 0 : 2) : 1;
+    if (format_video_sync == VSYNC_AUTO)
+        format_video_sync = (s->oformat->flags & AVFMT_VARIABLE_FPS) ? ((s->oformat->flags & AVFMT_NOTIMESTAMPS) ? VSYNC_PASSTHROUGH : VSYNC_VFR) : 1;

-    if (format_video_sync) {
+    if (format_video_sync != VSYNC_PASSTHROUGH) {
        double vdelta = sync_ipts - ost->sync_opts + duration;
        // FIXME set to 0.5 after we fix some dts/pts bugs like in avidec.c
        if (vdelta < -1.1)
            nb_frames = 0;
-        else if (format_video_sync == 2) {
+        else if (format_video_sync == VSYNC_VFR) {
            if (vdelta <= -0.6) {
                nb_frames = 0;
            } else if (vdelta > 0.6)
@ -4873,6 +4878,17 @@ static int opt_video_filters(OptionsContext *o, const char *opt, const char *arg
    return parse_option(o, "filter:v", arg, options);
 }

+static int opt_vsync(const char *opt, const char *arg)
+{
+    if      (!av_strcasecmp(arg, "cfr"))         video_sync_method = VSYNC_CFR;
+    else if (!av_strcasecmp(arg, "vfr"))         video_sync_method = VSYNC_VFR;
+    else if (!av_strcasecmp(arg, "passthrough")) video_sync_method = VSYNC_PASSTHROUGH;
+
+    if (video_sync_method == VSYNC_AUTO)
+        video_sync_method = parse_number_or_die("vsync", arg, OPT_INT, VSYNC_AUTO, VSYNC_VFR);
+    return 0;
+}
+
 #define OFFSET(x) offsetof(OptionsContext, x)
 static const OptionDef options[] = {
    /* main options */
@ -4908,7 +4924,7 @@ static const OptionDef options[] = {
    { "loop_input", OPT_BOOL | OPT_EXPERT, {(void*)&loop_input}, "deprecated, use -loop" },
    { "loop_output", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&loop_output}, "deprecated, use -loop", "" },
    { "target", HAS_ARG | OPT_FUNC2, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\", \"dvd\", \"dv\", \"dv50\", \"pal-vcd\", \"ntsc-svcd\", ...)", "type" },
-    { "vsync", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_sync_method}, "video sync method", "" },
+    { "vsync", HAS_ARG | OPT_EXPERT, {(void*)opt_vsync}, "video sync method", "" },
    { "async", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&audio_sync_method}, "audio sync method", "" },
    { "adrift_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT, {(void*)&audio_drift_threshold}, "audio drift threshold", "threshold" },
    { "copyts", OPT_BOOL | OPT_EXPERT, {(void*)&copy_ts}, "copy timestamps" },
--- a/libavcodec/adxdec.c
+++ b/libavcodec/adxdec.c
@ -165,6 +165,13 @@ static int adx_decode_frame(AVCodecContext *avctx, void *data,
    return buf - avpkt->data;
 }

+static void adx_decode_flush(AVCodecContext *avctx)
+{
+    ADXContext *c = avctx->priv_data;
+    memset(c->prev, 0, sizeof(c->prev));
+    c->eof = 0;
+}
+
 AVCodec ff_adpcm_adx_decoder = {
    .name           = "adpcm_adx",
    .type           = AVMEDIA_TYPE_AUDIO,
@ -172,6 +179,7 @@ AVCodec ff_adpcm_adx_decoder = {
    .priv_data_size = sizeof(ADXContext),
    .init           = adx_decode_init,
    .decode         = adx_decode_frame,
+    .flush          = adx_decode_flush,
    .capabilities   = CODEC_CAP_DR1,
    .long_name      = NULL_IF_CONFIG_SMALL("SEGA CRI ADX ADPCM"),
 };
--- a/libavcodec/adxenc.c
+++ b/libavcodec/adxenc.c
@ -19,9 +19,9 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

-#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "adx.h"
+#include "bytestream.h"
 #include "put_bits.h"

 /**
@ -33,167 +33,135 @@
 * adx2wav & wav2adx http://www.geocities.co.jp/Playtown/2004/
 */

-/* 18 bytes <-> 32 samples */
-
-static void adx_encode(ADXContext *c, unsigned char *adx, const short *wav,
-                       ADXChannelState *prev)
+static void adx_encode(ADXContext *c, uint8_t *adx, const int16_t *wav,
+                       ADXChannelState *prev, int channels)
 {
    PutBitContext pb;
    int scale;
-    int i;
-    int s0,s1,s2,d;
-    int max=0;
-    int min=0;
-    int data[32];
+    int i, j;
+    int s0, s1, s2, d;
+    int max = 0;
+    int min = 0;
+    int data[BLOCK_SAMPLES];

    s1 = prev->s1;
    s2 = prev->s2;
-    for(i=0;i<32;i++) {
+    for (i = 0, j = 0; j < 32; i += channels, j++) {
        s0 = wav[i];
        d = ((s0 << COEFF_BITS) - c->coeff[0] * s1 - c->coeff[1] * s2) >> COEFF_BITS;
-        data[i]=d;
-        if (max<d) max=d;
-        if (min>d) min=d;
+        data[j] = d;
+        if (max < d)
+            max = d;
+        if (min > d)
+            min = d;
        s2 = s1;
        s1 = s0;
    }
    prev->s1 = s1;
    prev->s2 = s2;

-    /* -8..+7 */
-
-    if (max==0 && min==0) {
-        memset(adx,0,18);
+    if (max == 0 && min == 0) {
+        memset(adx, 0, BLOCK_SIZE);
        return;
    }

-    if (max/7>-min/8) scale = max/7;
-    else scale = -min/8;
+    if (max / 7 > -min / 8)
+        scale = max / 7;
+    else
+        scale = -min / 8;

-    if (scale==0) scale=1;
+    if (scale == 0)
+        scale = 1;

    AV_WB16(adx, scale);

    init_put_bits(&pb, adx + 2, 16);
-    for (i = 0; i < 32; i++)
-        put_sbits(&pb, 4, av_clip(data[i]/scale, -8, 7));
+    for (i = 0; i < BLOCK_SAMPLES; i++)
+        put_sbits(&pb, 4, av_clip(data[i] / scale, -8, 7));
    flush_put_bits(&pb);
 }

-static int adx_encode_header(AVCodecContext *avctx,unsigned char *buf,size_t bufsize)
+#define HEADER_SIZE 36
+
+static int adx_encode_header(AVCodecContext *avctx, uint8_t *buf, int bufsize)
 {
-#if 0
-    struct {
-        uint32_t offset; /* 0x80000000 + sample start - 4 */
-        unsigned char unknown1[3]; /* 03 12 04 */
-        unsigned char channel; /* 1 or 2 */
-        uint32_t freq;
-        uint32_t size;
-        uint32_t unknown2; /* 01 f4 03 00 */
-        uint32_t unknown3; /* 00 00 00 00 */
-        uint32_t unknown4; /* 00 00 00 00 */
-
-    /* if loop
-        unknown3 00 15 00 01
-        unknown4 00 00 00 01
-        long loop_start_sample;
-        long loop_start_byte;
-        long loop_end_sample;
-        long loop_end_byte;
-        long
-    */
-    } adxhdr; /* big endian */
-    /* offset-6 "(c)CRI" */
-#endif
    ADXContext *c = avctx->priv_data;

-    AV_WB32(buf+0x00,0x80000000|0x20);
-    AV_WB32(buf+0x04,0x03120400|avctx->channels);
-    AV_WB32(buf+0x08,avctx->sample_rate);
-    AV_WB32(buf+0x0c,0); /* FIXME: set after */
-    AV_WB16(buf + 0x10, c->cutoff);
-    AV_WB32(buf + 0x12, 0x03000000);
-    AV_WB32(buf + 0x16, 0x00000000);
-    AV_WB32(buf + 0x1a, 0x00000000);
-    memcpy (buf + 0x1e, "(c)CRI", 6);
-    return 0x20+4;
+    if (bufsize < HEADER_SIZE)
+        return AVERROR(EINVAL);
+
+    bytestream_put_be16(&buf, 0x8000);              /* header signature */
+    bytestream_put_be16(&buf, HEADER_SIZE - 4);     /* copyright offset */
+    bytestream_put_byte(&buf, 3);                   /* encoding */
+    bytestream_put_byte(&buf, BLOCK_SIZE);          /* block size */
+    bytestream_put_byte(&buf, 4);                   /* sample size */
+    bytestream_put_byte(&buf, avctx->channels);     /* channels */
+    bytestream_put_be32(&buf, avctx->sample_rate);  /* sample rate */
+    bytestream_put_be32(&buf, 0);                   /* total sample count */
+    bytestream_put_be16(&buf, c->cutoff);           /* cutoff frequency */
+    bytestream_put_byte(&buf, 3);                   /* version */
+    bytestream_put_byte(&buf, 0);                   /* flags */
+    bytestream_put_be32(&buf, 0);                   /* unknown */
+    bytestream_put_be32(&buf, 0);                   /* loop enabled */
+    bytestream_put_be16(&buf, 0);                   /* padding */
+    bytestream_put_buffer(&buf, "(c)CRI", 6);       /* copyright signature */
+
+    return HEADER_SIZE;
 }

 static av_cold int adx_encode_init(AVCodecContext *avctx)
 {
    ADXContext *c = avctx->priv_data;

-    if (avctx->channels > 2)
-        return -1; /* only stereo or mono =) */
-    avctx->frame_size = 32;
-
-    avctx->coded_frame= avcodec_alloc_frame();
-    avctx->coded_frame->key_frame= 1;
+    if (avctx->channels > 2) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n");
+        return AVERROR(EINVAL);
+    }
+    avctx->frame_size = BLOCK_SAMPLES;

-//    avctx->bit_rate = avctx->sample_rate*avctx->channels*18*8/32;
+    avctx->coded_frame = avcodec_alloc_frame();

    /* the cutoff can be adjusted, but this seems to work pretty well */
    c->cutoff = 500;
    ff_adx_calculate_coeffs(c->cutoff, avctx->sample_rate, COEFF_BITS, c->coeff);

-    av_log(avctx, AV_LOG_DEBUG, "adx encode init\n");
-
    return 0;
 }

 static av_cold int adx_encode_close(AVCodecContext *avctx)
 {
    av_freep(&avctx->coded_frame);
-
    return 0;
 }

-static int adx_encode_frame(AVCodecContext *avctx,
-                uint8_t *frame, int buf_size, void *data)
+static int adx_encode_frame(AVCodecContext *avctx, uint8_t *frame,
+                            int buf_size, void *data)
 {
-    ADXContext *c = avctx->priv_data;
-    const short *samples = data;
-    unsigned char *dst = frame;
-    int rest = avctx->frame_size;
-
-/*
-    input data size =
-    ffmpeg.c: do_audio_out()
-    frame_bytes = enc->frame_size * 2 * enc->channels;
-*/
+    ADXContext *c          = avctx->priv_data;
+    const int16_t *samples = data;
+    uint8_t *dst           = frame;
+    int ch;

-//    printf("sz=%d ",buf_size); fflush(stdout);
    if (!c->header_parsed) {
-        int hdrsize = adx_encode_header(avctx,dst,buf_size);
-        dst+=hdrsize;
+        int hdrsize;
+        if ((hdrsize = adx_encode_header(avctx, dst, buf_size)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
+            return AVERROR(EINVAL);
+        }
+        dst      += hdrsize;
+        buf_size -= hdrsize;
        c->header_parsed = 1;
    }
+    if (buf_size < BLOCK_SIZE * avctx->channels) {
+        av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
+        return AVERROR(EINVAL);
+    }

-    if (avctx->channels==1) {
-        while(rest>=32) {
-            adx_encode(c, dst, samples, c->prev);
-            dst+=18;
-            samples+=32;
-            rest-=32;
-        }
-    } else {
-        while(rest>=32*2) {
-            short tmpbuf[32*2];
-            int i;
-
-            for(i=0;i<32;i++) {
-                tmpbuf[i] = samples[i*2];
-                tmpbuf[i+32] = samples[i*2+1];
-            }
-
-            adx_encode(c, dst,      tmpbuf,      c->prev);
-            adx_encode(c, dst + 18, tmpbuf + 32, c->prev + 1);
-            dst+=18*2;
-            samples+=32*2;
-            rest-=32*2;
-        }
+    for (ch = 0; ch < avctx->channels; ch++) {
+        adx_encode(c, dst, samples + ch, &c->prev[ch], avctx->channels);
+        dst += BLOCK_SIZE;
    }
-    return dst-frame;
+    return dst - frame;
 }

 AVCodec ff_adpcm_adx_encoder = {
@ -204,6 +172,7 @@ AVCodec ff_adpcm_adx_encoder = {
    .init           = adx_encode_init,
    .encode         = adx_encode_frame,
    .close          = adx_encode_close,
-    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
-    .long_name = NULL_IF_CONFIG_SMALL("SEGA CRI ADX ADPCM"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
+    .long_name      = NULL_IF_CONFIG_SMALL("SEGA CRI ADX ADPCM"),
 };
--- a/libavcodec/arm/rv34dsp_init_neon.c
+++ b/libavcodec/arm/rv34dsp_init_neon.c
@ -25,12 +25,9 @@

 void ff_rv34_inv_transform_neon(DCTELEM *block);
 void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
-void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);

 void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
 {
    c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
    c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
-
-    c->rv34_dequant4x4 = ff_rv34_dequant4x4_neon;
 }
--- a/libavcodec/arm/rv34dsp_neon.S
+++ b/libavcodec/arm/rv34dsp_neon.S
@ -107,27 +107,3 @@ function ff_rv34_inv_transform_noround_neon, export=1
        vst4.16         {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
        bx              lr
 endfunc
-
-function ff_rv34_dequant4x4_neon, export=1
-        mov             r3,  r0
-        mov             r12, #16
-        vdup.16         q0,  r2
-        vmov.16         d0[0], r1
-        vld1.16         {d2},     [r0,:64], r12
-        vld1.16         {d4},     [r0,:64], r12
-        vld1.16         {d6},     [r0,:64], r12
-        vld1.16         {d16},    [r0,:64], r12
-        vmull.s16       q1,  d2,  d0
-        vmull.s16       q2,  d4,  d1
-        vmull.s16       q3,  d6,  d1
-        vmull.s16       q8,  d16, d1
-        vqrshrn.s32     d2,  q1,  #4
-        vqrshrn.s32     d4,  q2,  #4
-        vqrshrn.s32     d6,  q3,  #4
-        vqrshrn.s32     d16, q8,  #4
-        vst1.16         {d2},     [r3,:64], r12
-        vst1.16         {d4},     [r3,:64], r12
-        vst1.16         {d6},     [r3,:64], r12
-        vst1.16         {d16},    [r3,:64], r12
-        bx              lr
-endfunc
--- a/libavcodec/bytestream.h
+++ b/libavcodec/bytestream.h
@ -39,11 +39,15 @@ static av_always_inline void bytestream_put_ ##name(uint8_t **b, const type valu
    write(*b, value);\
    (*b) += bytes;\
 }\
+static av_always_inline type bytestream2_get_ ## name ## u(GetByteContext *g)\
+{\
+    return bytestream_get_ ## name(&g->buffer);\
+}\
 static av_always_inline type bytestream2_get_ ## name(GetByteContext *g)\
 {\
    if (g->buffer_end - g->buffer < bytes)\
        return 0;\
-    return bytestream_get_ ## name(&g->buffer);\
+    return bytestream2_get_ ## name ## u(g);\
 }\
 static av_always_inline type bytestream2_peek_ ## name(GetByteContext *g)\
 {\
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@ -109,10 +109,6 @@ void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size){
    c->low= 0;
    c->range= 0x1FE;
    c->outstanding_count= 0;
-#ifdef STRICT_LIMITS
-    c->sym_count =0;
-#endif
-
    c->pb.bit_left++; //avoids firstBitFlag
 }

@ -183,10 +179,6 @@ static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
    }

    renorm_cabac_encoder(c);
-
-#ifdef STRICT_LIMITS
-    c->symCount++;
-#endif
 }

 /**
@ -208,10 +200,6 @@ static void put_cabac_bypass(CABACContext *c, int bit){
        put_cabac_bit(c, 1);
        c->low -= 0x400;
    }
-
-#ifdef STRICT_LIMITS
-    c->symCount++;
-#endif
 }

 /**
@ -236,10 +224,6 @@ static int put_cabac_terminate(CABACContext *c, int bit){
        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
    }

-#ifdef STRICT_LIMITS
-    c->symCount++;
-#endif
-
    return (put_bits_count(&c->pb)+7)>>3;
 }

@ -365,21 +349,6 @@ START_TIMER
            av_log(NULL, AV_LOG_ERROR, "CABAC failure at %d\n", i);
 STOP_TIMER("get_cabac")
    }
-#if 0
-    for(i=0; i<SIZE; i++){
-START_TIMER
-        if( r[i] != get_cabac_u(&c, state, (i&1) ? 6 : 7, 3, i&1) )
-            av_log(NULL, AV_LOG_ERROR, "CABAC unary (truncated) binarization failure at %d\n", i);
-STOP_TIMER("get_cabac_u")
-    }
-
-    for(i=0; i<SIZE; i++){
-START_TIMER
-        if( r[i] != get_cabac_ueg(&c, state, 3, 0, 1, 2))
-            av_log(NULL, AV_LOG_ERROR, "CABAC unary (truncated) binarization failure at %d\n", i);
-STOP_TIMER("get_cabac_ueg")
-    }
-#endif
    if(!get_cabac_terminate(&c))
        av_log(NULL, AV_LOG_ERROR, "where's the Terminator?\n");

--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@ -41,9 +41,6 @@ typedef struct CABACContext{
    int low;
    int range;
    int outstanding_count;
-#ifdef STRICT_LIMITS
-    int symCount;
-#endif
    const uint8_t *bytestream_start;
    const uint8_t *bytestream;
    const uint8_t *bytestream_end;
@ -216,62 +213,4 @@ static int av_unused get_cabac_terminate(CABACContext *c){
    }
 }

-#if 0
-/**
- * Get (truncated) unary binarization.
- */
-static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
-    int i;
-
-    for(i=0; i<max; i++){
-        if(get_cabac(c, state)==0)
-            return i;
-
-        if(i< max_index) state++;
-    }
-
-    return truncated ? max : -1;
-}
-
-/**
- * get unary exp golomb k-th order binarization.
- */
-static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
-    int i, v;
-    int m= 1<<k;
-
-    if(get_cabac(c, state)==0)
-        return 0;
-
-    if(0 < max_index) state++;
-
-    for(i=1; i<max; i++){
-        if(get_cabac(c, state)==0){
-            if(is_signed && get_cabac_bypass(c)){
-                return -i;
-            }else
-                return i;
-        }
-
-        if(i < max_index) state++;
-    }
-
-    while(get_cabac_bypass(c)){
-        i+= m;
-        m+= m;
-    }
-
-    v=0;
-    while(m>>=1){
-        v+= v + get_cabac_bypass(c);
-    }
-    i += v;
-
-    if(is_signed && get_cabac_bypass(c)){
-        return -i;
-    }else
-        return i;
-}
-#endif /* 0 */
-
 #endif /* AVCODEC_CABAC_H */
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@ -578,9 +578,8 @@ static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int

        for (i = 0; i < 8; i++) {
            DCTELEM *block = ctx->blocks[i];
-            int last_index, overflow;
-            int n = dnxhd_switch_matrix(ctx, i);
-            last_index = ctx->m.dct_quantize(&ctx->m, block, 4&(2*i), qscale, &overflow);
+            int overflow, n = dnxhd_switch_matrix(ctx, i);
+            int last_index = ctx->m.dct_quantize(&ctx->m, block, 4&(2*i), qscale, &overflow);
            //START_TIMER;
            dnxhd_encode_block(ctx, block, last_index, n);
            //STOP_TIMER("encode_block");
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@ -4051,7 +4051,7 @@ static int decode_frame(AVCodecContext *avctx,
    H264Context *h = avctx->priv_data;
    MpegEncContext *s = &h->s;
    AVFrame *pict = data;
-    int buf_index;
+    int buf_index = 0;
    Picture *out;
    int i, out_idx;

@ -4081,7 +4081,7 @@ static int decode_frame(AVCodecContext *avctx,
            *pict= *(AVFrame*)out;
        }

-        return buf_size;
+        return buf_index;
    }
    if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){
        int cnt= buf[5]&0x1f;
@ -4112,7 +4112,6 @@ not_extra:

    if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
        av_assert0(buf_index <= buf_size);
-        buf_size = buf_index;
        goto out;
    }

@ -4193,9 +4192,7 @@ int main(void){

    init_get_bits(&gb, temp, 8*SIZE);
    for(i=0; i<COUNT; i++){
-        int j, s;
-
-        s= show_bits(&gb, 24);
+        int j, s = show_bits(&gb, 24);

        {START_TIMER
        j= get_ue_golomb(&gb);
@ -4218,9 +4215,7 @@ int main(void){

    init_get_bits(&gb, temp, 8*SIZE);
    for(i=0; i<COUNT; i++){
-        int j, s;
-
-        s= show_bits(&gb, 24);
+        int j, s = show_bits(&gb, 24);

        {START_TIMER
        j= get_se_golomb(&gb);
--- a/libavcodec/indeo5.c
+++ b/libavcodec/indeo5.c
@ -760,7 +760,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,

    switch_buffers(ctx);

-    //START_TIMER;
+    //{ START_TIMER;

    if (ctx->frame_type != FRAMETYPE_NULL) {
        for (p = 0; p < 3; p++) {
@ -775,7 +775,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
        }
    }

-    //STOP_TIMER("decode_planes");
+    //STOP_TIMER("decode_planes"); }

    if (ctx->frame.data[0])
        avctx->release_buffer(avctx, &ctx->frame);
--- a/libavcodec/libspeexenc.c
+++ b/libavcodec/libspeexenc.c
@ -83,7 +83,8 @@ typedef struct {
    int abr;                    ///< flag to enable ABR
    int pkt_frame_count;        ///< frame count for the current packet
    int lookahead;              ///< encoder delay
-    int sample_count;           ///< total sample count (used for pts)
+    int64_t next_pts;           ///< next pts, in sample_rate time base
+    int pkt_sample_count;       ///< sample count in the current packet
 } LibSpeexEncContext;

 static av_cold void print_enc_params(AVCodecContext *avctx,
@ -201,7 +202,7 @@ static av_cold int encode_init(AVCodecContext *avctx)

    /* set encoding delay */
    speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &s->lookahead);
-    s->sample_count = -s->lookahead;
+    s->next_pts = -s->lookahead;

    /* create header packet bytes from header struct */
    /* note: libspeex allocates the memory for header_data, which is freed
@ -235,7 +236,6 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size,
 {
    LibSpeexEncContext *s = avctx->priv_data;
    int16_t *samples      = data;
-    int sample_count      = s->sample_count;

    if (data) {
        /* encode Speex frame */
@ -243,7 +243,7 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size,
            speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
        speex_encode_int(s->enc_state, samples, &s->bits);
        s->pkt_frame_count++;
-        s->sample_count += avctx->frame_size;
+        s->pkt_sample_count += avctx->frame_size;
    } else {
        /* handle end-of-stream */
        if (!s->pkt_frame_count)
@ -259,8 +259,10 @@ static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size,
    if (s->pkt_frame_count == s->frames_per_packet) {
        s->pkt_frame_count = 0;
        avctx->coded_frame->pts =
-            av_rescale_q(sample_count, (AVRational){ 1, avctx->sample_rate },
+            av_rescale_q(s->next_pts, (AVRational){ 1, avctx->sample_rate },
                         avctx->time_base);
+        s->next_pts += s->pkt_sample_count;
+        s->pkt_sample_count = 0;
        if (buf_size > speex_bits_nbytes(&s->bits)) {
            int ret = speex_bits_write(&s->bits, frame, buf_size);
            speex_bits_reset(&s->bits);
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@ -212,7 +212,7 @@ static int rv34_decode_cbp(GetBitContext *gb, RV34VLC *vlc, int table)
 /**
 * Get one coefficient value from the bistream and store it.
 */
-static inline void decode_coeff(DCTELEM *dst, int coef, int esc, GetBitContext *gb, VLC* vlc)
+static inline void decode_coeff(DCTELEM *dst, int coef, int esc, GetBitContext *gb, VLC* vlc, int q)
 {
    if(coef){
        if(coef == esc){
@ -225,14 +225,14 @@ static inline void decode_coeff(DCTELEM *dst, int coef, int esc, GetBitContext *
        }
        if(get_bits1(gb))
            coef = -coef;
-        *dst = coef;
+        *dst = (coef*q + 8) >> 4;
    }
 }

 /**
 * Decode 2x2 subblock of coefficients.
 */
-static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2, GetBitContext *gb, VLC *vlc)
+static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2, GetBitContext *gb, VLC *vlc, int q)
 {
    int coeffs[4];

@ -240,15 +240,35 @@ static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2,
    coeffs[1] = modulo_three_table[code][1];
    coeffs[2] = modulo_three_table[code][2];
    coeffs[3] = modulo_three_table[code][3];
-    decode_coeff(dst  , coeffs[0], 3, gb, vlc);
+    decode_coeff(dst  , coeffs[0], 3, gb, vlc, q);
    if(is_block2){
-        decode_coeff(dst+8, coeffs[1], 2, gb, vlc);
-        decode_coeff(dst+1, coeffs[2], 2, gb, vlc);
+        decode_coeff(dst+8, coeffs[1], 2, gb, vlc, q);
+        decode_coeff(dst+1, coeffs[2], 2, gb, vlc, q);
    }else{
-        decode_coeff(dst+1, coeffs[1], 2, gb, vlc);
-        decode_coeff(dst+8, coeffs[2], 2, gb, vlc);
+        decode_coeff(dst+1, coeffs[1], 2, gb, vlc, q);
+        decode_coeff(dst+8, coeffs[2], 2, gb, vlc, q);
    }
-    decode_coeff(dst+9, coeffs[3], 2, gb, vlc);
+    decode_coeff(dst+9, coeffs[3], 2, gb, vlc, q);
+}
+
+static inline void decode_subblock3(DCTELEM *dst, int code, const int is_block2, GetBitContext *gb, VLC *vlc,
+                                    int q_dc, int q_ac1, int q_ac2)
+{
+    int coeffs[4];
+
+    coeffs[0] = modulo_three_table[code][0];
+    coeffs[1] = modulo_three_table[code][1];
+    coeffs[2] = modulo_three_table[code][2];
+    coeffs[3] = modulo_three_table[code][3];
+    decode_coeff(dst  , coeffs[0], 3, gb, vlc, q_dc);
+    if(is_block2){
+        decode_coeff(dst+8, coeffs[1], 2, gb, vlc, q_ac1);
+        decode_coeff(dst+1, coeffs[2], 2, gb, vlc, q_ac1);
+    }else{
+        decode_coeff(dst+1, coeffs[1], 2, gb, vlc, q_ac1);
+        decode_coeff(dst+8, coeffs[2], 2, gb, vlc, q_ac1);
+    }
+    decode_coeff(dst+9, coeffs[3], 2, gb, vlc, q_ac2);
 }

 /**
@ -262,7 +282,7 @@ static inline void decode_subblock(DCTELEM *dst, int code, const int is_block2,
 *  o--o
 */

-static inline void rv34_decode_block(DCTELEM *dst, GetBitContext *gb, RV34VLC *rvlc, int fc, int sc)
+static inline void rv34_decode_block(DCTELEM *dst, GetBitContext *gb, RV34VLC *rvlc, int fc, int sc, int q_dc, int q_ac1, int q_ac2)
 {
    int code, pattern;

@ -271,39 +291,23 @@ static inline void rv34_decode_block(DCTELEM *dst, GetBitContext *gb, RV34VLC *r
    pattern = code & 0x7;

    code >>= 3;
-    decode_subblock(dst, code, 0, gb, &rvlc->coefficient);
+    decode_subblock3(dst, code, 0, gb, &rvlc->coefficient, q_dc, q_ac1, q_ac2);

    if(pattern & 4){
        code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2);
-        decode_subblock(dst + 2, code, 0, gb, &rvlc->coefficient);
+        decode_subblock(dst + 2, code, 0, gb, &rvlc->coefficient, q_ac2);
    }
    if(pattern & 2){ // Looks like coefficients 1 and 2 are swapped for this block
        code = get_vlc2(gb, rvlc->second_pattern[sc].table, 9, 2);
-        decode_subblock(dst + 8*2, code, 1, gb, &rvlc->coefficient);
+        decode_subblock(dst + 8*2, code, 1, gb, &rvlc->coefficient, q_ac2);
    }
    if(pattern & 1){
        code = get_vlc2(gb, rvlc->third_pattern[sc].table, 9, 2);
-        decode_subblock(dst + 8*2+2, code, 0, gb, &rvlc->coefficient);
+        decode_subblock(dst + 8*2+2, code, 0, gb, &rvlc->coefficient, q_ac2);
    }

 }

-/**
- * Dequantize 4x4 block of DC values for 16x16 macroblock.
- * @todo optimize
- */
-static inline void rv34_dequant4x4_16x16(DCTELEM *block, int Qdc, int Q)
-{
-    int i;
-
-    for(i = 0; i < 3; i++)
-         block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Qdc + 8) >> 4;
-    for(; i < 16; i++)
-         block[rv34_dezigzag[i]] = (block[rv34_dezigzag[i]] * Q + 8) >> 4;
-}
-/** @} */ //block functions
-
-
 /**
 * @name RV30/40 bitstream parsing
 * @{
@ -676,8 +680,9 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
    srcY += src_y * s->linesize + src_x;
    srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
    srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
-    if(   (unsigned)(src_x - !!lx*2) > s->h_edge_pos - !!lx*2 - (width <<3) - 4
-       || (unsigned)(src_y - !!ly*2) > s->v_edge_pos - !!ly*2 - (height<<3) - 4){
+    if(s->h_edge_pos - (width << 3) < 6 || s->v_edge_pos - (height << 3) < 6 ||
+       (unsigned)(src_x - !!lx*2) > s->h_edge_pos - !!lx*2 - (width <<3) - 4 ||
+       (unsigned)(src_y - !!ly*2) > s->v_edge_pos - !!ly*2 - (height<<3) - 4) {
        uint8_t *uvbuf = s->edge_emu_buffer + 22 * s->linesize;

        srcY -= 2 + 2*s->linesize;
@ -1097,6 +1102,7 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types)
    MpegEncContext *s = &r->s;
    GetBitContext *gb = &s->gb;
    int cbp, cbp2;
+    int q_dc, q_ac;
    int i, blknum, blkoff;
    LOCAL_ALIGNED_16(DCTELEM, block16, [64]);
    int luma_dc_quant;
@ -1133,31 +1139,34 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types)

    luma_dc_quant = r->block_type == RV34_MB_P_MIX16x16 ? r->luma_dc_quant_p[s->qscale] : r->luma_dc_quant_i[s->qscale];
    if(r->is16){
+        q_dc = rv34_qscale_tab[luma_dc_quant];
+        q_ac = rv34_qscale_tab[s->qscale];
        memset(block16, 0, 64 * sizeof(*block16));
-        rv34_decode_block(block16, gb, r->cur_vlcs, 3, 0);
-        rv34_dequant4x4_16x16(block16, rv34_qscale_tab[luma_dc_quant],rv34_qscale_tab[s->qscale]);
+        rv34_decode_block(block16, gb, r->cur_vlcs, 3, 0, q_dc, q_dc, q_ac);
        r->rdsp.rv34_inv_transform_tab[1](block16);
    }

+    q_ac = rv34_qscale_tab[s->qscale];
    for(i = 0; i < 16; i++, cbp >>= 1){
        if(!r->is16 && !(cbp & 1)) continue;
        blknum = ((i & 2) >> 1) + ((i & 8) >> 2);
        blkoff = ((i & 1) << 2) + ((i & 4) << 3);
        if(cbp & 1)
-            rv34_decode_block(s->block[blknum] + blkoff, gb, r->cur_vlcs, r->luma_vlc, 0);
-        r->rdsp.rv34_dequant4x4(s->block[blknum] + blkoff, rv34_qscale_tab[s->qscale],rv34_qscale_tab[s->qscale]);
+            rv34_decode_block(s->block[blknum] + blkoff, gb,
+                              r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac);
        if(r->is16) //FIXME: optimize
            s->block[blknum][blkoff] = block16[(i & 3) | ((i & 0xC) << 1)];
        r->rdsp.rv34_inv_transform_tab[0](s->block[blknum] + blkoff);
    }
    if(r->block_type == RV34_MB_P_MIX16x16)
        r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 1);
+    q_dc = rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]];
+    q_ac = rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]];
    for(; i < 24; i++, cbp >>= 1){
        if(!(cbp & 1)) continue;
        blknum = ((i & 4) >> 2) + 4;
        blkoff = ((i & 1) << 2) + ((i & 2) << 4);
-        rv34_decode_block(s->block[blknum] + blkoff, gb, r->cur_vlcs, r->chroma_vlc, 1);
-        r->rdsp.rv34_dequant4x4(s->block[blknum] + blkoff, rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]],rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]]);
+        rv34_decode_block(s->block[blknum] + blkoff, gb, r->cur_vlcs, r->chroma_vlc, 1, q_dc, q_ac, q_ac);
        r->rdsp.rv34_inv_transform_tab[0](s->block[blknum] + blkoff);
    }
    if (IS_INTRA(s->current_picture_ptr->f.mb_type[mb_pos]))
--- a/libavcodec/rv34data.h
+++ b/libavcodec/rv34data.h
@ -100,16 +100,6 @@ static const uint16_t rv34_qscale_tab[32] = {
 963, 1074, 1212, 1392, 1566, 1708, 1978, 2211
 };

-/**
- * 4x4 dezigzag pattern
- */
-static const uint8_t rv34_dezigzag[16] = {
-  0,  1,  8, 16,
-  9,  2,  3, 10,
- 17, 24, 25, 18,
- 11, 19, 26, 27
-};
-
 /**
 * tables used to translate a quantizer value into a VLC set for decoding
 * The first table is used for intraframes.
--- a/libavcodec/rv34dsp.c
+++ b/libavcodec/rv34dsp.c
@ -100,26 +100,10 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
 /** @} */ // transform


-/**
- * Dequantize ordinary 4x4 block.
- */
-void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);
-static void rv34_dequant4x4_c(DCTELEM *block, int Qdc, int Q)
-{
-    int i, j;
-
-    block[0] = (block[0] * Qdc + 8) >> 4;
-    for (i = 0; i < 4; i++)
-        for (j = !i; j < 4; j++)
-            block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
-}
-
 av_cold void ff_rv34dsp_init(RV34DSPContext *c, DSPContext* dsp) {
    c->rv34_inv_transform_tab[0] = rv34_inv_transform_c;
    c->rv34_inv_transform_tab[1] = rv34_inv_transform_noround_c;

-    c->rv34_dequant4x4 = rv34_dequant4x4_c;
-
    if (HAVE_NEON)
        ff_rv34dsp_init_neon(c, dsp);
 }
--- a/libavcodec/rv34dsp.h
+++ b/libavcodec/rv34dsp.h
@ -56,7 +56,6 @@ typedef struct RV34DSPContext {
    h264_chroma_mc_func avg_chroma_pixels_tab[3];
    rv40_weight_func rv40_weight_pixels_tab[2];
    rv34_inv_transform_func rv34_inv_transform_tab[2];
-    void (*rv34_dequant4x4)(DCTELEM *block, int Qdc, int Q);
    rv40_weak_loop_filter_func rv40_weak_loop_filter[2];
    rv40_strong_loop_filter_func rv40_strong_loop_filter[2];
    rv40_loop_filter_strength_func rv40_loop_filter_strength[2];
--- a/libavcodec/ulti.c
+++ b/libavcodec/ulti.c
@ -38,16 +38,9 @@ typedef struct UltimotionDecodeContext {
    int width, height, blocks;
    AVFrame frame;
    const uint8_t *ulti_codebook;
+    GetByteContext gb;
 } UltimotionDecodeContext;

-#define CHECK_OVERREAD_SIZE(size) \
-    do { \
-        if (buf_end - buf < (size)) { \
-            av_log(avctx, AV_LOG_ERROR, "Insufficient data\n"); \
-            return AVERROR_INVALIDDATA; \
-        } \
-    } while(0)
-
 static av_cold int ulti_decode_init(AVCodecContext *avctx)
 {
    UltimotionDecodeContext *s = avctx->priv_data;
@ -232,7 +225,6 @@ static int ulti_decode_frame(AVCodecContext *avctx,
    int i;
    int skip;
    int tmp;
-    const uint8_t *buf_end = buf + buf_size;

    s->frame.reference = 3;
    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
@ -241,18 +233,20 @@ static int ulti_decode_frame(AVCodecContext *avctx,
        return -1;
    }

+    bytestream2_init(&s->gb, buf, buf_size);
+
    while(!done) {
        int idx;
        if(blocks >= s->blocks || y >= s->height)
            break;//all blocks decoded

-        CHECK_OVERREAD_SIZE(1);
-        idx = *buf++;
+        if (bytestream2_get_bytes_left(&s->gb) < 1)
+            goto err;
+        idx = bytestream2_get_byteu(&s->gb);
        if((idx & 0xF8) == 0x70) {
            switch(idx) {
            case 0x70: //change modifier
-                CHECK_OVERREAD_SIZE(1);
-                modifier = *buf++;
+                modifier = bytestream2_get_byte(&s->gb);
                if(modifier>1)
                    av_log(avctx, AV_LOG_INFO, "warning: modifier must be 0 or 1, got %i\n", modifier);
                break;
@ -266,8 +260,7 @@ static int ulti_decode_frame(AVCodecContext *avctx,
                done = 1;
                break;
            case 0x74: //skip some blocks
-                CHECK_OVERREAD_SIZE(1);
-                skip = *buf++;
+                skip = bytestream2_get_byte(&s->gb);
                if ((blocks + skip) >= s->blocks)
                    break;
                blocks += skip;
@ -294,8 +287,7 @@ static int ulti_decode_frame(AVCodecContext *avctx,
            } else {
                cf = 0;
                if (idx) {
-                    CHECK_OVERREAD_SIZE(1);
-                    chroma = *buf++;
+                    chroma = bytestream2_get_byte(&s->gb);
                }
            }
            for (i = 0; i < 4; i++) { // for every subblock
@ -303,15 +295,13 @@ static int ulti_decode_frame(AVCodecContext *avctx,
                if(!code) //skip subblock
                    continue;
                if(cf) {
-                    CHECK_OVERREAD_SIZE(1);
-                    chroma = *buf++;
+                    chroma = bytestream2_get_byte(&s->gb);
                }
                tx = x + block_coords[i * 2];
                ty = y + block_coords[(i * 2) + 1];
                switch(code) {
                case 1:
-                    CHECK_OVERREAD_SIZE(1);
-                    tmp = *buf++;
+                    tmp = bytestream2_get_byte(&s->gb);

                    angle = angle_by_index[(tmp >> 6) & 0x3];

@ -331,8 +321,7 @@ static int ulti_decode_frame(AVCodecContext *avctx,

                case 2:
                    if (modifier) { // unpack four luma samples
-                        CHECK_OVERREAD_SIZE(3);
-                        tmp = bytestream_get_be24(&buf);
+                        tmp = bytestream2_get_be24(&s->gb);

                        Y[0] = (tmp >> 18) & 0x3F;
                        Y[1] = (tmp >> 12) & 0x3F;
@ -340,8 +329,7 @@ static int ulti_decode_frame(AVCodecContext *avctx,
                        Y[3] = tmp & 0x3F;
                        angle = 16;
                    } else { // retrieve luma samples from codebook
-                        CHECK_OVERREAD_SIZE(2);
-                        tmp = bytestream_get_be16(&buf);
+                        tmp = bytestream2_get_be16(&s->gb);

                        angle = (tmp >> 12) & 0xF;
                        tmp &= 0xFFF;
@ -357,27 +345,27 @@ static int ulti_decode_frame(AVCodecContext *avctx,
                    if (modifier) { // all 16 luma samples
                        uint8_t Luma[16];

-                        CHECK_OVERREAD_SIZE(12);
-
-                        tmp = bytestream_get_be24(&buf);
+                        if (bytestream2_get_bytes_left(&s->gb) < 12)
+                            goto err;
+                        tmp = bytestream2_get_be24u(&s->gb);
                        Luma[0] = (tmp >> 18) & 0x3F;
                        Luma[1] = (tmp >> 12) & 0x3F;
                        Luma[2] = (tmp >> 6) & 0x3F;
                        Luma[3] = tmp & 0x3F;

-                        tmp = bytestream_get_be24(&buf);
+                        tmp = bytestream2_get_be24u(&s->gb);
                        Luma[4] = (tmp >> 18) & 0x3F;
                        Luma[5] = (tmp >> 12) & 0x3F;
                        Luma[6] = (tmp >> 6) & 0x3F;
                        Luma[7] = tmp & 0x3F;

-                        tmp = bytestream_get_be24(&buf);
+                        tmp = bytestream2_get_be24u(&s->gb);
                        Luma[8] = (tmp >> 18) & 0x3F;
                        Luma[9] = (tmp >> 12) & 0x3F;
                        Luma[10] = (tmp >> 6) & 0x3F;
                        Luma[11] = tmp & 0x3F;

-                        tmp = bytestream_get_be24(&buf);
+                        tmp = bytestream2_get_be24u(&s->gb);
                        Luma[12] = (tmp >> 18) & 0x3F;
                        Luma[13] = (tmp >> 12) & 0x3F;
                        Luma[14] = (tmp >> 6) & 0x3F;
@ -385,22 +373,23 @@ static int ulti_decode_frame(AVCodecContext *avctx,

                        ulti_convert_yuv(&s->frame, tx, ty, Luma, chroma);
                    } else {
-                        CHECK_OVERREAD_SIZE(4);
-                        tmp = *buf++;
+                        if (bytestream2_get_bytes_left(&s->gb) < 4)
+                            goto err;
+                        tmp = bytestream2_get_byteu(&s->gb);
                        if(tmp & 0x80) {
                            angle = (tmp >> 4) & 0x7;
-                            tmp = (tmp << 8) + *buf++;
+                            tmp = (tmp << 8) + bytestream2_get_byteu(&s->gb);
                            Y[0] = (tmp >> 6) & 0x3F;
                            Y[1] = tmp & 0x3F;
-                            Y[2] = (*buf++) & 0x3F;
-                            Y[3] = (*buf++) & 0x3F;
+                            Y[2] = bytestream2_get_byteu(&s->gb) & 0x3F;
+                            Y[3] = bytestream2_get_byteu(&s->gb) & 0x3F;
                            ulti_grad(&s->frame, tx, ty, Y, chroma, angle); //draw block
                        } else { // some patterns
                            int f0, f1;
-                            f0 = *buf++;
+                            f0 = bytestream2_get_byteu(&s->gb);
                            f1 = tmp;
-                            Y[0] = (*buf++) & 0x3F;
-                            Y[1] = (*buf++) & 0x3F;
+                            Y[0] = bytestream2_get_byteu(&s->gb) & 0x3F;
+                            Y[1] = bytestream2_get_byteu(&s->gb) & 0x3F;
                            ulti_pattern(&s->frame, tx, ty, f1, f0, Y[0], Y[1], chroma);
                        }
                    }
@ -422,6 +411,11 @@ static int ulti_decode_frame(AVCodecContext *avctx,
    *(AVFrame*)data= s->frame;

    return buf_size;
+
+err:
+    av_log(avctx, AV_LOG_ERROR,
+           "Insufficient data\n");
+    return AVERROR_INVALIDDATA;
 }

 AVCodec ff_ulti_decoder = {
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@ -568,6 +568,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
    }

    if (v->rangeredfrm || (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+        || s->h_edge_pos < 22 || v_edge_pos < 22
        || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 16 - s->mspel * 3
        || (unsigned)(src_y - s->mspel) > v_edge_pos    - (my&3) - 16 - s->mspel * 3) {
        uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
@ -799,6 +800,7 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir)
    if (fieldmv && (src_y & 1) && src_y < 4)
        src_y--;
    if (v->rangeredfrm || (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+        || s->h_edge_pos < 13 || v_edge_pos < 23
        || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 8 - s->mspel * 2
        || (unsigned)(src_y - (s->mspel << fieldmv)) > v_edge_pos - (my & 3) - ((8 + s->mspel * 2) << fieldmv)) {
        srcY -= s->mspel * (1 + (s->linesize << fieldmv));
@ -998,6 +1000,7 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
    }

    if (v->rangeredfrm || (v->mv_mode == MV_PMODE_INTENSITY_COMP)
+        || s->h_edge_pos < 18 || v_edge_pos < 18
        || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
        || (unsigned)uvsrc_y > (v_edge_pos    >> 1) - 9) {
        s->dsp.emulated_edge_mc(s->edge_emu_buffer     , srcU, s->uvlinesize,
@ -1102,6 +1105,7 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
        if (fieldmv && (uvsrc_y & 1) && uvsrc_y < 2)
            uvsrc_y--;
        if ((v->mv_mode == MV_PMODE_INTENSITY_COMP)
+            || s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv)
            || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5
            || (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) {
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcU, s->uvlinesize,
@ -2006,7 +2010,7 @@ static void vc1_interp_mc(VC1Context *v)
        srcV = s->edge_emu_buffer + 18 * s->linesize;
    }

-    if (v->rangeredfrm
+    if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22
        || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 16 - s->mspel * 3
        || (unsigned)(src_y - s->mspel) > v_edge_pos    - (my & 3) - 16 - s->mspel * 3) {
        uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@ -69,19 +69,17 @@

 static const int desired_video_buffers = 256;

-enum io_method {
-    io_read,
-    io_mmap,
-    io_userptr
-};
+#define V4L_ALLFORMATS  3
+#define V4L_RAWFORMATS  1
+#define V4L_COMPFORMATS 2

 struct video_data {
    AVClass *class;
    int fd;
    int frame_format; /* V4L2_PIX_FMT_* */
-    enum io_method io_method;
    int width, height;
    int frame_size;
+    int interlaced;
    int top_field_first;

    int buffers;
@ -89,8 +87,10 @@ struct video_data {
    unsigned int *buf_len;
    char *standard;
    int channel;
-    char *video_size; /**< String describing video size, set by a private option. */
+    char *video_size;   /**< String describing video size,
+                             set by a private option. */
    char *pixel_format; /**< Set by a private option. */
+    int list_format;    /**< Set by a private option. */
    char *framerate;    /**< Set by a private option. */
 };

@ -124,7 +124,7 @@ static struct fmt_map fmt_conversion_table[] = {
    { PIX_FMT_NONE,    CODEC_ID_MJPEG,    V4L2_PIX_FMT_JPEG    },
 };

-static int device_open(AVFormatContext *ctx, uint32_t *capabilities)
+static int device_open(AVFormatContext *ctx)
 {
    struct v4l2_capability cap;
    int fd;
@ -137,11 +137,15 @@ static int device_open(AVFormatContext *ctx, uint32_t *capabilities)
    if (ctx->flags & AVFMT_FLAG_NONBLOCK) {
        flags |= O_NONBLOCK;
    }
+
    fd = v4l2_open(ctx->filename, flags, 0);
    if (fd < 0) {
+        err = errno;
+
        av_log(ctx, AV_LOG_ERROR, "Cannot open video device %s : %s\n",
-                 ctx->filename, strerror(errno));
-        return AVERROR(errno);
+               ctx->filename, strerror(err));
+
+        return AVERROR(err);
    }
 #if CONFIG_LIBV4L2
    fd_libv4l = v4l2_fd_open(fd, 0);
@ -155,53 +159,80 @@ static int device_open(AVFormatContext *ctx, uint32_t *capabilities)
 #endif

    res = v4l2_ioctl(fd, VIDIOC_QUERYCAP, &cap);
-    // ENOIOCTLCMD definition only availble on __KERNEL__
-    if (res < 0 && ((err = errno) == 515)) {
-        av_log(ctx, AV_LOG_ERROR, "QUERYCAP not implemented, probably V4L device but not supporting V4L2\n");
-        v4l2_close(fd);
-
-        return AVERROR(515);
-    }
    if (res < 0) {
+        err = errno;
        av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QUERYCAP): %s\n",
-                 strerror(errno));
-        v4l2_close(fd);
-        return AVERROR(err);
+               strerror(err));
+
+        goto fail;
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE, "[%d]Capabilities: %x\n",
+           fd, cap.capabilities);
+
+    if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
+        av_log(ctx, AV_LOG_ERROR, "Not a video capture device.\n");
+        err = ENODEV;
+
+        goto fail;
    }
-    if ((cap.capabilities & V4L2_CAP_VIDEO_CAPTURE) == 0) {
-        av_log(ctx, AV_LOG_ERROR, "Not a video capture device\n");
-        v4l2_close(fd);
-        return AVERROR(ENODEV);
+
+    if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
+        av_log(ctx, AV_LOG_ERROR,
+               "The device does not support the streaming I/O method.\n");
+        err = ENOSYS;
+
+        goto fail;
    }
-    *capabilities = cap.capabilities;

    return fd;
+
+fail:
+    v4l2_close(fd);
+    return AVERROR(err);
 }

-static int device_init(AVFormatContext *ctx, int *width, int *height, uint32_t pix_fmt)
+static int device_init(AVFormatContext *ctx, int *width, int *height,
+                       uint32_t pix_fmt)
 {
    struct video_data *s = ctx->priv_data;
    int fd = s->fd;
-    struct v4l2_format fmt = {0};
+    struct v4l2_format fmt;
+    struct v4l2_pix_format *pix = &fmt.fmt.pix;
+
    int res;

+    memset(&fmt, 0, sizeof(struct v4l2_format));
+
    fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-    fmt.fmt.pix.width = *width;
-    fmt.fmt.pix.height = *height;
-    fmt.fmt.pix.pixelformat = pix_fmt;
-    fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
+    pix->width = *width;
+    pix->height = *height;
+    pix->pixelformat = pix_fmt;
+    pix->field = V4L2_FIELD_ANY;
+
    res = v4l2_ioctl(fd, VIDIOC_S_FMT, &fmt);
+
    if ((*width != fmt.fmt.pix.width) || (*height != fmt.fmt.pix.height)) {
-        av_log(ctx, AV_LOG_INFO, "The V4L2 driver changed the video from %dx%d to %dx%d\n", *width, *height, fmt.fmt.pix.width, fmt.fmt.pix.height);
+        av_log(ctx, AV_LOG_INFO,
+               "The V4L2 driver changed the video from %dx%d to %dx%d\n",
+               *width, *height, fmt.fmt.pix.width, fmt.fmt.pix.height);
        *width = fmt.fmt.pix.width;
        *height = fmt.fmt.pix.height;
    }

    if (pix_fmt != fmt.fmt.pix.pixelformat) {
-        av_log(ctx, AV_LOG_DEBUG, "The V4L2 driver changed the pixel format from 0x%08X to 0x%08X\n", pix_fmt, fmt.fmt.pix.pixelformat);
+        av_log(ctx, AV_LOG_DEBUG,
+               "The V4L2 driver changed the pixel format "
+               "from 0x%08X to 0x%08X\n",
+               pix_fmt, fmt.fmt.pix.pixelformat);
        res = -1;
    }

+    if (fmt.fmt.pix.field == V4L2_FIELD_INTERLACED) {
+        av_log(ctx, AV_LOG_DEBUG, "The V4L2 driver using the interlaced mode");
+        s->interlaced = 1;
+    }
+
    return res;
 }

@ -264,6 +295,71 @@ static enum CodecID fmt_v4l2codec(uint32_t v4l2_fmt)
    return CODEC_ID_NONE;
 }

+#if HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE
+static void list_framesizes(AVFormatContext *ctx, int fd, uint32_t pixelformat)
+{
+    struct v4l2_frmsizeenum vfse = { .pixel_format = pixelformat };
+
+    while(!ioctl(fd, VIDIOC_ENUM_FRAMESIZES, &vfse)) {
+        switch (vfse.type) {
+        case V4L2_FRMSIZE_TYPE_DISCRETE:
+            av_log(ctx, AV_LOG_INFO, " %ux%u",
+                   vfse.discrete.width, vfse.discrete.height);
+        break;
+        case V4L2_FRMSIZE_TYPE_CONTINUOUS:
+        case V4L2_FRMSIZE_TYPE_STEPWISE:
+            av_log(ctx, AV_LOG_INFO, " {%u-%u, %u}x{%u-%u, %u}",
+                   vfse.stepwise.min_width,
+                   vfse.stepwise.max_width,
+                   vfse.stepwise.step_width,
+                   vfse.stepwise.min_height,
+                   vfse.stepwise.max_height,
+                   vfse.stepwise.step_height);
+        }
+        vfse.index++;
+    }
+}
+#endif
+
+static void list_formats(AVFormatContext *ctx, int fd, int type)
+{
+    struct v4l2_fmtdesc vfd = { .type = V4L2_BUF_TYPE_VIDEO_CAPTURE };
+
+    while(!ioctl(fd, VIDIOC_ENUM_FMT, &vfd)) {
+        enum CodecID codec_id = fmt_v4l2codec(vfd.pixelformat);
+        enum PixelFormat pix_fmt = fmt_v4l2ff(vfd.pixelformat, codec_id);
+
+        vfd.index++;
+
+        if (!(vfd.flags & V4L2_FMT_FLAG_COMPRESSED) &&
+            type & V4L_RAWFORMATS) {
+            const char *fmt_name = av_get_pix_fmt_name(pix_fmt);
+            av_log(ctx, AV_LOG_INFO, "R : %9s : %20s :",
+                   fmt_name ? fmt_name : "Unsupported",
+                   vfd.description);
+        } else if (vfd.flags & V4L2_FMT_FLAG_COMPRESSED &&
+                   type & V4L_COMPFORMATS) {
+            AVCodec *codec = avcodec_find_encoder(codec_id);
+            av_log(ctx, AV_LOG_INFO, "C : %9s : %20s :",
+                   codec ? codec->name : "Unsupported",
+                   vfd.description);
+        } else {
+            continue;
+        }
+
+#ifdef V4L2_FMT_FLAG_EMULATED
+        if (vfd.flags & V4L2_FMT_FLAG_EMULATED) {
+            av_log(ctx, AV_LOG_WARNING, "%s", "Emulated");
+            continue;
+        }
+#endif
+#if HAVE_STRUCT_V4L2_FRMIVALENUM_DISCRETE
+        list_framesizes(ctx, fd, vfd.pixelformat);
+#endif
+        av_log(ctx, AV_LOG_INFO, "\n");
+    }
+}
+
 static int mmap_init(AVFormatContext *ctx)
 {
    struct video_data *s = ctx->priv_data;
@ -314,12 +410,16 @@ static int mmap_init(AVFormatContext *ctx)

        s->buf_len[i] = buf.length;
        if (s->frame_size > 0 && s->buf_len[i] < s->frame_size) {
-            av_log(ctx, AV_LOG_ERROR, "Buffer len [%d] = %d != %d\n", i, s->buf_len[i], s->frame_size);
+            av_log(ctx, AV_LOG_ERROR,
+                   "Buffer len [%d] = %d != %d\n",
+                   i, s->buf_len[i], s->frame_size);

            return -1;
        }
        s->buf_start[i] = v4l2_mmap(NULL, buf.length,
-                        PROT_READ | PROT_WRITE, MAP_SHARED, s->fd, buf.m.offset);
+                               PROT_READ | PROT_WRITE, MAP_SHARED,
+                               s->fd, buf.m.offset);
+
        if (s->buf_start[i] == MAP_FAILED) {
            av_log(ctx, AV_LOG_ERROR, "mmap: %s\n", strerror(errno));
            return AVERROR(errno);
@ -329,20 +429,14 @@ static int mmap_init(AVFormatContext *ctx)
    return 0;
 }

-static int read_init(AVFormatContext *ctx)
-{
-    return -1;
-}
-
 static void mmap_release_buffer(AVPacket *pkt)
 {
    struct v4l2_buffer buf = {0};
    int res, fd;
    struct buff_data *buf_descriptor = pkt->priv;

-    if (pkt->data == NULL) {
-         return;
-    }
+    if (pkt->data == NULL)
+        return;

    buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    buf.memory = V4L2_MEMORY_MMAP;
@ -351,9 +445,10 @@ static void mmap_release_buffer(AVPacket *pkt)
    av_free(buf_descriptor);

    res = v4l2_ioctl(fd, VIDIOC_QBUF, &buf);
-    if (res < 0) {
-        av_log(NULL, AV_LOG_ERROR, "ioctl(VIDIOC_QBUF): %s\n", strerror(errno));
-    }
+    if (res < 0)
+        av_log(NULL, AV_LOG_ERROR, "ioctl(VIDIOC_QBUF): %s\n",
+               strerror(errno));
+
    pkt->data = NULL;
    pkt->size = 0;
 }
@ -375,13 +470,17 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
            pkt->size = 0;
            return AVERROR(EAGAIN);
        }
-        av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_DQBUF): %s\n", strerror(errno));
+        av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_DQBUF): %s\n",
+               strerror(errno));

        return AVERROR(errno);
    }
    assert(buf.index < s->buffers);
    if (s->frame_size > 0 && buf.bytesused != s->frame_size) {
-        av_log(ctx, AV_LOG_ERROR, "The v4l2 frame is %d bytes, but %d bytes are expected\n", buf.bytesused, s->frame_size);
+        av_log(ctx, AV_LOG_ERROR,
+               "The v4l2 frame is %d bytes, but %d bytes are expected\n",
+               buf.bytesused, s->frame_size);
+
        return AVERROR_INVALIDDATA;
    }

@ -407,11 +506,6 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
    return s->buf_len[buf.index];
 }

-static int read_frame(AVFormatContext *ctx, AVPacket *pkt)
-{
-    return -1;
-}
-
 static int mmap_start(AVFormatContext *ctx)
 {
    struct video_data *s = ctx->priv_data;
@ -427,7 +521,9 @@ static int mmap_start(AVFormatContext *ctx)

        res = v4l2_ioctl(s->fd, VIDIOC_QBUF, &buf);
        if (res < 0) {
-            av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QBUF): %s\n", strerror(errno));
+            av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_QBUF): %s\n",
+                   strerror(errno));
+
            return AVERROR(errno);
        }
    }
@ -435,7 +531,9 @@ static int mmap_start(AVFormatContext *ctx)
    type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    res = v4l2_ioctl(s->fd, VIDIOC_STREAMON, &type);
    if (res < 0) {
-        av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_STREAMON): %s\n", strerror(errno));
+        av_log(ctx, AV_LOG_ERROR, "ioctl(VIDIOC_STREAMON): %s\n",
+               strerror(errno));
+
        return AVERROR(errno);
    }

@ -471,8 +569,10 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)

    streamparm.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;

-    if (s->framerate && (ret = av_parse_video_rate(&framerate_q, s->framerate)) < 0) {
-        av_log(s1, AV_LOG_ERROR, "Could not parse framerate '%s'.\n", s->framerate);
+    if (s->framerate &&
+        (ret = av_parse_video_rate(&framerate_q, s->framerate)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Could not parse framerate '%s'.\n",
+               s->framerate);
        return ret;
    }

@ -486,7 +586,8 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
    av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set input_id: %d, input: %s\n",
            s->channel, input.name);
    if (v4l2_ioctl(s->fd, VIDIOC_S_INPUT, &input.index) < 0) {
-        av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set input(%d) failed\n",
+        av_log(s1, AV_LOG_ERROR,
+               "The V4L2 driver ioctl set input(%d) failed\n",
                s->channel);
        return AVERROR(EIO);
    }
@ -506,10 +607,12 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
            return ret;
        }

-        av_log(s1, AV_LOG_DEBUG, "The V4L2 driver set standard: %s, id: %"PRIu64"\n",
+        av_log(s1, AV_LOG_DEBUG,
+               "The V4L2 driver set standard: %s, id: %"PRIu64"\n",
               s->standard, (uint64_t)standard.id);
        if (v4l2_ioctl(s->fd, VIDIOC_S_STD, &standard.id) < 0) {
-            av_log(s1, AV_LOG_ERROR, "The V4L2 driver ioctl set standard(%s) failed\n",
+            av_log(s1, AV_LOG_ERROR,
+                   "The V4L2 driver ioctl set standard(%s) failed\n",
                   s->standard);
            return AVERROR(EIO);
        }
@ -520,6 +623,7 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
               framerate_q.den, framerate_q.num);
        tpf->numerator   = framerate_q.den;
        tpf->denominator = framerate_q.num;
+
        if (v4l2_ioctl(s->fd, VIDIOC_S_PARM, &streamparm) != 0) {
            av_log(s1, AV_LOG_ERROR,
                   "ioctl set time per frame(%d/%d) failed\n",
@ -530,14 +634,15 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
        if (framerate_q.num != tpf->denominator ||
            framerate_q.den != tpf->numerator) {
            av_log(s1, AV_LOG_INFO,
-                   "The driver changed the time per frame from %d/%d to %d/%d\n",
+                   "The driver changed the time per frame from "
+                   "%d/%d to %d/%d\n",
                   framerate_q.den, framerate_q.num,
                   tpf->numerator, tpf->denominator);
        }
    } else {
-        /* if timebase value is not set, read the timebase value from the driver */
        if (v4l2_ioctl(s->fd, VIDIOC_G_PARM, &streamparm) != 0) {
-            av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_PARM): %s\n", strerror(errno));
+            av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_PARM): %s\n",
+                   strerror(errno));
            return AVERROR(errno);
        }
    }
@ -571,6 +676,7 @@ static uint32_t device_try_init(AVFormatContext *s1,
            }
        }
    }
+
    if (desired_format != 0) {
        *codec_id = fmt_v4l2codec(desired_format);
        assert(*codec_id != CODEC_ID_NONE);
@ -584,7 +690,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
    struct video_data *s = s1->priv_data;
    AVStream *st;
    int res = 0;
-    uint32_t desired_format, capabilities;
+    uint32_t desired_format;
    enum CodecID codec_id;
    enum PixelFormat pix_fmt = PIX_FMT_NONE;

@ -593,42 +699,62 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
        res = AVERROR(ENOMEM);
        goto out;
    }
-    avpriv_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */

-    if (s->video_size && (res = av_parse_video_size(&s->width, &s->height, s->video_size)) < 0) {
-        av_log(s1, AV_LOG_ERROR, "Could not parse video size '%s'.\n", s->video_size);
+    s->fd = device_open(s1);
+    if (s->fd < 0) {
+        res = s->fd;
        goto out;
    }
-    if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) {
-        av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format);
-        res = AVERROR(EINVAL);
+
+    if (s->list_format) {
+        list_formats(s1, s->fd, s->list_format);
+        res = AVERROR_EXIT;
        goto out;
    }

-    capabilities = 0;
-    s->fd = device_open(s1, &capabilities);
-    if (s->fd < 0) {
-        res = AVERROR(EIO);
+    avpriv_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */
+
+    if (s->video_size &&
+        (res = av_parse_video_size(&s->width, &s->height, s->video_size)) < 0) {
+        av_log(s1, AV_LOG_ERROR, "Could not parse video size '%s'.\n",
+               s->video_size);
        goto out;
    }
-    av_log(s1, AV_LOG_VERBOSE, "[%d]Capabilities: %x\n", s->fd, capabilities);
+
+    if (s->pixel_format) {
+
+        pix_fmt = av_get_pix_fmt(s->pixel_format);
+
+        if (pix_fmt == PIX_FMT_NONE) {
+            av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n",
+                   s->pixel_format);
+
+            res = AVERROR(EINVAL);
+            goto out;
+        }
+    }

    if (!s->width && !s->height) {
        struct v4l2_format fmt;

-        av_log(s1, AV_LOG_VERBOSE, "Querying the device for the current frame size\n");
+        av_log(s1, AV_LOG_VERBOSE,
+               "Querying the device for the current frame size\n");
        fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        if (v4l2_ioctl(s->fd, VIDIOC_G_FMT, &fmt) < 0) {
-            av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_FMT): %s\n", strerror(errno));
+            av_log(s1, AV_LOG_ERROR, "ioctl(VIDIOC_G_FMT): %s\n",
+                   strerror(errno));
            res = AVERROR(errno);
            goto out;
        }
+
        s->width  = fmt.fmt.pix.width;
        s->height = fmt.fmt.pix.height;
-        av_log(s1, AV_LOG_VERBOSE, "Setting frame size to %dx%d\n", s->width, s->height);
+        av_log(s1, AV_LOG_VERBOSE,
+               "Setting frame size to %dx%d\n", s->width, s->height);
    }

-    desired_format = device_try_init(s1, pix_fmt, &s->width, &s->height, &codec_id);
+    desired_format = device_try_init(s1, pix_fmt, &s->width, &s->height,
+                                     &codec_id);
    if (desired_format == 0) {
        av_log(s1, AV_LOG_ERROR, "Cannot find a proper format for "
               "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, pix_fmt);
@ -639,32 +765,29 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
    }
    if ((res = av_image_check_size(s->width, s->height, 0, s1)) < 0)
        goto out;
+
    s->frame_format = desired_format;

    if ((res = v4l2_set_parameters(s1, ap)) < 0)
        goto out;

    st->codec->pix_fmt = fmt_v4l2ff(desired_format, codec_id);
-    s->frame_size = avpicture_get_size(st->codec->pix_fmt, s->width, s->height);
-    if (capabilities & V4L2_CAP_STREAMING) {
-        s->io_method = io_mmap;
-        res = mmap_init(s1);
-        if (res == 0) {
-            res = mmap_start(s1);
-        }
-    } else {
-        s->io_method = io_read;
-        res = read_init(s1);
-    }
-    if (res < 0) {
+    s->frame_size =
+        avpicture_get_size(st->codec->pix_fmt, s->width, s->height);
+
+    if ((res = mmap_init(s1)) ||
+        (res = mmap_start(s1)) < 0) {
        v4l2_close(s->fd);
-        res = AVERROR(EIO);
        goto out;
    }
+
    s->top_field_first = first_field(s->fd);

    st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
    st->codec->codec_id = codec_id;
+    if (codec_id == CODEC_ID_RAWVIDEO)
+        st->codec->codec_tag =
+            avcodec_pix_fmt_to_codec_tag(st->codec->pix_fmt);
    st->codec->width = s->width;
    st->codec->height = s->height;
    st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8;
@ -676,26 +799,17 @@ out:
 static int v4l2_read_packet(AVFormatContext *s1, AVPacket *pkt)
 {
    struct video_data *s = s1->priv_data;
+    AVFrame *frame = s1->streams[0]->codec->coded_frame;
    int res;

-    if (s->io_method == io_mmap) {
-        av_init_packet(pkt);
-        res = mmap_read_frame(s1, pkt);
-    } else if (s->io_method == io_read) {
-        if (av_new_packet(pkt, s->frame_size) < 0)
-            return AVERROR(EIO);
-
-        res = read_frame(s1, pkt);
-    } else {
-        return AVERROR(EIO);
-    }
-    if (res < 0) {
+    av_init_packet(pkt);
+    if ((res = mmap_read_frame(s1, pkt)) < 0) {
        return res;
    }

-    if (s1->streams[0]->codec->coded_frame) {
-        s1->streams[0]->codec->coded_frame->interlaced_frame = 1;
-        s1->streams[0]->codec->coded_frame->top_field_first = s->top_field_first;
+    if (frame && s->interlaced) {
+        frame->interlaced_frame = 1;
+        frame->top_field_first = s->top_field_first;
    }

    return pkt->size;
@ -705,9 +819,7 @@ static int v4l2_read_close(AVFormatContext *s1)
 {
    struct video_data *s = s1->priv_data;

-    if (s->io_method == io_mmap) {
-        mmap_close(s);
-    }
+    mmap_close(s);

    v4l2_close(s->fd);
    return 0;
@ -717,11 +829,15 @@ static int v4l2_read_close(AVFormatContext *s1)
 #define DEC AV_OPT_FLAG_DECODING_PARAM

 static const AVOption options[] = {
-    { "standard", "", OFFSET(standard), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
-    { "channel",  "", OFFSET(channel),  AV_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
-    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
-    { "pixel_format", "", OFFSET(pixel_format), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
-    { "framerate", "", OFFSET(framerate), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "standard",     "TV standard, used only by analog frame grabber",            OFFSET(standard),     AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0,       DEC },
+    { "channel",      "TV channel, used only by frame grabber",                    OFFSET(channel),      AV_OPT_TYPE_INT,    {.dbl = 0 },    0, INT_MAX, DEC },
+    { "video_size",   "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size),   AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,       DEC },
+    { "pixel_format", "",                                                          OFFSET(pixel_format), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,       DEC },
+    { "framerate",    "",                                                          OFFSET(framerate),    AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,       DEC },
+    { "list_formats", "List available formats and exit",                           OFFSET(list_format),  AV_OPT_TYPE_INT,    {.dbl = 0 },  0, INT_MAX, DEC, "list_formats" },
+    { "all",          "Show all available formats",                                OFFSET(list_format),  AV_OPT_TYPE_CONST,  {.dbl = V4L_ALLFORMATS  },    0, INT_MAX, DEC, "list_formats" },
+    { "raw",          "Show only non-compressed formats",                          OFFSET(list_format),  AV_OPT_TYPE_CONST,  {.dbl = V4L_RAWFORMATS  },    0, INT_MAX, DEC, "list_formats" },
+    { "compressed",   "Show only compressed formats",                              OFFSET(list_format),  AV_OPT_TYPE_CONST,  {.dbl = V4L_COMPFORMATS },    0, INT_MAX, DEC, "list_formats" },
    { NULL },
 };

--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@ -26,6 +26,7 @@ OBJS-$(CONFIG_AC3_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_ACT_DEMUXER)               += act.o
 OBJS-$(CONFIG_ADF_DEMUXER)               += bintext.o sauce.o
 OBJS-$(CONFIG_ADX_DEMUXER)               += adxdec.o
+OBJS-$(CONFIG_ADX_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_ADTS_MUXER)                += adtsenc.o
 OBJS-$(CONFIG_AEA_DEMUXER)               += aea.o pcm.o
 OBJS-$(CONFIG_AIFF_DEMUXER)              += aiffdec.o riff.o pcm.o isom.o
--- a/libavformat/adxdec.c
+++ b/libavformat/adxdec.c
@ -109,4 +109,5 @@ AVInputFormat ff_adx_demuxer = {
    .read_packet    = adx_read_packet,
    .extensions     = "adx",
    .value          = CODEC_ID_ADPCM_ADX,
+    .flags          = AVFMT_GENERIC_INDEX,
 };
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@ -54,7 +54,7 @@ void av_register_all(void)
    REGISTER_DEMUXER  (ACT, act);
    REGISTER_DEMUXER  (ADF, adf);
    REGISTER_MUXER    (ADTS, adts);
-    REGISTER_DEMUXER  (ADX, adx);
+    REGISTER_MUXDEMUX (ADX, adx);
    REGISTER_DEMUXER  (AEA, aea);
    REGISTER_MUXDEMUX (AIFF, aiff);
    REGISTER_MUXDEMUX (AMR, amr);
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@ -574,6 +574,10 @@ static void fill_buffer(AVIOContext *s)
    int len= s->buffer_size - (dst - s->buffer);
    int max_buffer_size = s->max_packet_size ? s->max_packet_size : IO_BUFFER_SIZE;

+    /* can't fill the buffer without read_packet, just set EOF if appropiate */
+    if (!s->read_packet && s->buf_ptr >= s->buf_end)
+        s->eof_reached = 1;
+
    /* no need to do anything if EOF already reached */
    if (s->eof_reached)
        return;
--- a/libavformat/mtv.c
+++ b/libavformat/mtv.c
@ -112,10 +112,12 @@ static int mtv_read_header(AVFormatContext *s, AVFormatParameters *ap)

    avio_skip(pb, 4);
    audio_subsegments = avio_rl16(pb);
-    if(!audio_subsegments){
-        av_log(s, AV_LOG_ERROR, "audio_subsegments is 0\n");
-        return AVERROR(EINVAL);
+
+    if (audio_subsegments == 0) {
+        av_log_ask_for_sample(s, "MTV files without audio are not supported\n");
+        return AVERROR_INVALIDDATA;
    }
+
    mtv->full_segment_size =
        audio_subsegments * (MTV_AUDIO_PADDING_SIZE + MTV_ASUBCHUNK_DATA_SIZE) +
        mtv->img_segment_size;
--- a/libavformat/rawenc.c
+++ b/libavformat/rawenc.c
@ -45,6 +45,18 @@ AVOutputFormat ff_ac3_muxer = {
 };
 #endif

+#if CONFIG_ADX_MUXER
+AVOutputFormat ff_adx_muxer = {
+    .name              = "adx",
+    .long_name         = NULL_IF_CONFIG_SMALL("CRI ADX"),
+    .extensions        = "adx",
+    .audio_codec       = CODEC_ID_ADPCM_ADX,
+    .video_codec       = CODEC_ID_NONE,
+    .write_packet      = ff_raw_write_packet,
+    .flags             = AVFMT_NOTIMESTAMPS,
+};
+#endif
+
 #if CONFIG_DIRAC_MUXER
 AVOutputFormat ff_dirac_muxer = {
    .name              = "dirac",
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@ -2470,7 +2470,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
    int64_t dc_mask, eq_mask, both_masks;
    int64_t sums[10*8*2];
    src+= step*3; // src points to begin of the 8x8 Block
-//START_TIMER
+    //{ START_TIMER
    __asm__ volatile(
        "movq %0, %%mm7                         \n\t"
        "movq %1, %%mm6                         \n\t"
@ -2995,7 +2995,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
    STOP_TIMER("step16")
 }else{
    STOP_TIMER("stepX")
-}*/
+}
+    } */
 }
 #endif //HAVE_MMX

--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@ -19,7 +19,8 @@ OBJS-$(HAVE_MMX)           +=  x86/rgb2rgb.o            \
                               x86/swscale_mmx.o        \
                               x86/yuv2rgb_mmx.o
 OBJS-$(HAVE_VIS)           +=  sparc/yuv2rgb_vis.o
-OBJS-$(HAVE_YASM)          +=  x86/scale.o
+MMX-OBJS-$(HAVE_YASM)      +=  x86/output.o             \
+                               x86/scale.o

 $(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)

--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@ -21,8 +21,8 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

-#ifndef PPC_YUV2RGB_ALTIVEC_H
-#define PPC_YUV2RGB_ALTIVEC_H 1
+#ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H
+#define SWSCALE_PPC_YUV2RGB_ALTIVEC_H

 #define YUV2PACKEDX_HEADER(suffix) \
 void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
@ -39,4 +39,4 @@ YUV2PACKEDX_HEADER(rgba);
 YUV2PACKEDX_HEADER(rgb24);
 YUV2PACKEDX_HEADER(bgr24);

-#endif /* PPC_YUV2RGB_ALTIVEC_H */
+#endif /* SWSCALE_PPC_YUV2RGB_ALTIVEC_H */
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@ -18,39 +18,6 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

-/*
-  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
-  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
-  {BGR,RGB}{1,4,8,15,16} support dithering
-
-  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
-  YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
-  x -> x
-  YUV9 -> YV12
-  YUV9/YV12 -> Y800
-  Y800 -> YUV9/YV12
-  BGR24 -> BGR32 & RGB24 -> RGB32
-  BGR32 -> BGR24 & RGB32 -> RGB24
-  BGR15 -> BGR16
-*/
-
-/*
-tested special converters (most are tested actually, but I did not write it down ...)
- YV12 -> BGR12/BGR16
- YV12 -> YV12
- BGR15 -> BGR16
- BGR16 -> BGR16
- YVU9 -> YV12
-
-untested special converters
-  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
-  YV12/I420 -> YV12/I420
-  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
-  BGR24 -> BGR32 & RGB24 -> RGB32
-  BGR32 -> BGR24 & RGB32 -> RGB24
-  BGR24 -> YV12
-*/
-
 #include <inttypes.h>
 #include <string.h>
 #include <math.h>
@ -2371,36 +2338,6 @@ find_c_packed_planar_out_funcs(SwsContext *c,
    } else {
        YUV_PACKED:
        switch (dstFormat) {
-        case PIX_FMT_GRAY16BE:
-            *yuv2packed1 = yuv2gray16BE_1_c;
-            *yuv2packed2 = yuv2gray16BE_2_c;
-            *yuv2packedX = yuv2gray16BE_X_c;
-            break;
-        case PIX_FMT_GRAY16LE:
-            *yuv2packed1 = yuv2gray16LE_1_c;
-            *yuv2packed2 = yuv2gray16LE_2_c;
-            *yuv2packedX = yuv2gray16LE_X_c;
-            break;
-        case PIX_FMT_MONOWHITE:
-            *yuv2packed1 = yuv2monowhite_1_c;
-            *yuv2packed2 = yuv2monowhite_2_c;
-            *yuv2packedX = yuv2monowhite_X_c;
-            break;
-        case PIX_FMT_MONOBLACK:
-            *yuv2packed1 = yuv2monoblack_1_c;
-            *yuv2packed2 = yuv2monoblack_2_c;
-            *yuv2packedX = yuv2monoblack_X_c;
-            break;
-        case PIX_FMT_YUYV422:
-            *yuv2packed1 = yuv2yuyv422_1_c;
-            *yuv2packed2 = yuv2yuyv422_2_c;
-            *yuv2packedX = yuv2yuyv422_X_c;
-            break;
-        case PIX_FMT_UYVY422:
-            *yuv2packed1 = yuv2uyvy422_1_c;
-            *yuv2packed2 = yuv2uyvy422_2_c;
-            *yuv2packedX = yuv2uyvy422_X_c;
-            break;
        case PIX_FMT_RGB48LE:
            *yuv2packed1 = yuv2rgb48le_1_c;
            *yuv2packed2 = yuv2rgb48le_2_c;
@ -2517,6 +2454,38 @@ find_c_packed_planar_out_funcs(SwsContext *c,
            break;
        }
    }
+    switch (dstFormat) {
+    case PIX_FMT_GRAY16BE:
+        *yuv2packed1 = yuv2gray16BE_1_c;
+        *yuv2packed2 = yuv2gray16BE_2_c;
+        *yuv2packedX = yuv2gray16BE_X_c;
+        break;
+    case PIX_FMT_GRAY16LE:
+        *yuv2packed1 = yuv2gray16LE_1_c;
+        *yuv2packed2 = yuv2gray16LE_2_c;
+        *yuv2packedX = yuv2gray16LE_X_c;
+        break;
+    case PIX_FMT_MONOWHITE:
+        *yuv2packed1 = yuv2monowhite_1_c;
+        *yuv2packed2 = yuv2monowhite_2_c;
+        *yuv2packedX = yuv2monowhite_X_c;
+        break;
+    case PIX_FMT_MONOBLACK:
+        *yuv2packed1 = yuv2monoblack_1_c;
+        *yuv2packed2 = yuv2monoblack_2_c;
+        *yuv2packedX = yuv2monoblack_X_c;
+        break;
+    case PIX_FMT_YUYV422:
+        *yuv2packed1 = yuv2yuyv422_1_c;
+        *yuv2packed2 = yuv2yuyv422_2_c;
+        *yuv2packedX = yuv2yuyv422_X_c;
+        break;
+    case PIX_FMT_UYVY422:
+        *yuv2packed1 = yuv2uyvy422_1_c;
+        *yuv2packed2 = yuv2uyvy422_2_c;
+        *yuv2packedX = yuv2uyvy422_X_c;
+        break;
+    }
 }

 #define DEBUG_SWSCALE_BUFFERS 0
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@ -0,0 +1,409 @@
+;******************************************************************************
+;* x86-optimized vertical line scaling functions
+;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
+;*                    Kieran Kunhya <kieran@kunhya.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+minshort:      times 8 dw 0x8000
+yuv2yuvX_16_start:  times 4 dd 0x4000 - 0x40000000
+yuv2yuvX_10_start:  times 4 dd 0x10000
+yuv2yuvX_9_start:   times 4 dd 0x20000
+yuv2yuvX_10_upper:  times 8 dw 0x3ff
+yuv2yuvX_9_upper:   times 8 dw 0x1ff
+pd_4:          times 4 dd 4
+pd_4min0x40000:times 4 dd 4 - (0x40000)
+pw_16:         times 8 dw 16
+pw_32:         times 8 dw 32
+pw_512:        times 8 dw 512
+pw_1024:       times 8 dw 1024
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; vertical line scaling
+;
+; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW,
+;                                     const uint8_t *dither, int offset)
+; and
+; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize,
+;                                     const int16_t **src, uint8_t *dst, int dstW,
+;                                     const uint8_t *dither, int offset)
+;
+; Scale one or $filterSize lines of source data to generate one line of output
+; data. The input is 15-bit in int16_t if $output_size is [8,10] and 19-bit in
+; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
+; of 2. $offset is either 0 or 3. $dither holds 8 values.
+;-----------------------------------------------------------------------------
+
+%macro yuv2planeX_fn 4
+
+%ifdef ARCH_X86_32
+%define cntr_reg r1
+%define movsx mov
+%else
+%define cntr_reg r11
+%define movsx movsxd
+%endif
+
+cglobal yuv2planeX_%2_%1, %4, 7, %3
+%if %2 == 8 || %2 == 9 || %2 == 10
+    pxor            m6,  m6
+%endif ; %2 == 8/9/10
+
+%if %2 == 8
+%ifdef ARCH_X86_32
+%assign pad 0x2c - (stack_offset & 15)
+    SUB             rsp, pad
+%define m_dith m7
+%else ; x86-64
+%define m_dith m9
+%endif ; x86-32
+
+    ; create registers holding dither
+    movq        m_dith, [r5]             ; dither
+    test            r6d, r6d
+    jz              .no_rot
+%if mmsize == 16
+    punpcklqdq  m_dith,  m_dith
+%endif ; mmsize == 16
+    PALIGNR     m_dith,  m_dith,  3,  m0
+.no_rot:
+%if mmsize == 16
+    punpcklbw   m_dith,  m6
+%ifdef ARCH_X86_64
+    punpcklwd       m8,  m_dith,  m6
+    pslld           m8,  12
+%else ; x86-32
+    punpcklwd       m5,  m_dith,  m6
+    pslld           m5,  12
+%endif ; x86-32/64
+    punpckhwd   m_dith,  m6
+    pslld       m_dith,  12
+%ifdef ARCH_X86_32
+    mova      [rsp+ 0],  m5
+    mova      [rsp+16],  m_dith
+%endif
+%else ; mmsize == 8
+    punpcklbw       m5,  m_dith,  m6
+    punpckhbw   m_dith,  m6
+    punpcklwd       m4,  m5,  m6
+    punpckhwd       m5,  m6
+    punpcklwd       m3,  m_dith,  m6
+    punpckhwd   m_dith,  m6
+    pslld           m4,  12
+    pslld           m5,  12
+    pslld           m3,  12
+    pslld       m_dith,  12
+    mova      [rsp+ 0],  m4
+    mova      [rsp+ 8],  m5
+    mova      [rsp+16],  m3
+    mova      [rsp+24],  m_dith
+%endif ; mmsize == 8/16
+%endif ; %2 == 8
+
+    xor             r5,  r5
+
+.pixelloop:
+%assign %%i 0
+    ; the rep here is for the 8bit output mmx case, where dither covers
+    ; 8 pixels but we can only handle 2 pixels per register, and thus 4
+    ; pixels per iteration. In order to not have to keep track of where
+    ; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
+%if %2 == 8
+%rep 16/mmsize
+%endif ; %2 == 8
+
+%if %2 == 8
+%ifdef ARCH_X86_32
+    mova            m2, [rsp+mmsize*(0+%%i)]
+    mova            m1, [rsp+mmsize*(1+%%i)]
+%else ; x86-64
+    mova            m2,  m8
+    mova            m1,  m_dith
+%endif ; x86-32/64
+%else ; %2 == 9/10/16
+    mova            m1, [yuv2yuvX_%2_start]
+    mova            m2,  m1
+%endif ; %2 == 8/9/10/16
+    movsx     cntr_reg,  r1m
+.filterloop_ %+ %%i:
+    ; input pixels
+    mov             r6, [r2+gprsize*cntr_reg-2*gprsize]
+%if %2 == 16
+    mova            m3, [r6+r5*4]
+    mova            m5, [r6+r5*4+mmsize]
+%else ; %2 == 8/9/10
+    mova            m3, [r6+r5*2]
+%endif ; %2 == 8/9/10/16
+    mov             r6, [r2+gprsize*cntr_reg-gprsize]
+%if %2 == 16
+    mova            m4, [r6+r5*4]
+    mova            m6, [r6+r5*4+mmsize]
+%else ; %2 == 8/9/10
+    mova            m4, [r6+r5*2]
+%endif ; %2 == 8/9/10/16
+
+    ; coefficients
+    movd            m0, [r0+2*cntr_reg-4]; coeff[0], coeff[1]
+%if %2 == 16
+    pshuflw         m7,  m0,  0          ; coeff[0]
+    pshuflw         m0,  m0,  0x55       ; coeff[1]
+    pmovsxwd        m7,  m7              ; word -> dword
+    pmovsxwd        m0,  m0              ; word -> dword
+
+    pmulld          m3,  m7
+    pmulld          m5,  m7
+    pmulld          m4,  m0
+    pmulld          m6,  m0
+
+    paddd           m2,  m3
+    paddd           m1,  m5
+    paddd           m2,  m4
+    paddd           m1,  m6
+%else ; %2 == 10/9/8
+    punpcklwd       m5,  m3,  m4
+    punpckhwd       m3,  m4
+    SPLATD          m0,  m0
+
+    pmaddwd         m5,  m0
+    pmaddwd         m3,  m0
+
+    paddd           m2,  m5
+    paddd           m1,  m3
+%endif ; %2 == 8/9/10/16
+
+    sub       cntr_reg,  2
+    jg .filterloop_ %+ %%i
+
+%if %2 == 16
+    psrad           m2,  31 - %2
+    psrad           m1,  31 - %2
+%else ; %2 == 10/9/8
+    psrad           m2,  27 - %2
+    psrad           m1,  27 - %2
+%endif ; %2 == 8/9/10/16
+
+%if %2 == 8
+    packssdw        m2,  m1
+    packuswb        m2,  m2
+    movh     [r3+r5*1],  m2
+%else ; %2 == 9/10/16
+%if %2 == 16
+    packssdw        m2,  m1
+    paddw           m2, [minshort]
+%else ; %2 == 9/10
+%ifidn %1, sse4
+    packusdw        m2,  m1
+%elifidn %1, avx
+    packusdw        m2,  m1
+%else ; mmx2/sse2
+    packssdw        m2,  m1
+    pmaxsw          m2,  m6
+%endif ; mmx2/sse2/sse4/avx
+    pminsw          m2, [yuv2yuvX_%2_upper]
+%endif ; %2 == 9/10/16
+    mova     [r3+r5*2],  m2
+%endif ; %2 == 8/9/10/16
+
+    add             r5,  mmsize/2
+    sub             r4d, mmsize/2
+%if %2 == 8
+%assign %%i %%i+2
+%endrep
+%endif ; %2 == 8
+    jg .pixelloop
+
+%if %2 == 8
+%ifdef ARCH_X86_32
+    ADD             rsp, pad
+    RET
+%else ; x86-64
+    REP_RET
+%endif ; x86-32/64
+%else ; %2 == 9/10/16
+    REP_RET
+%endif ; %2 == 8/9/10/16
+%endmacro
+
+%define PALIGNR PALIGNR_MMX
+%ifdef ARCH_X86_32
+INIT_MMX
+yuv2planeX_fn mmx2,  8,  0, 7
+yuv2planeX_fn mmx2,  9,  0, 5
+yuv2planeX_fn mmx2, 10,  0, 5
+%endif
+
+INIT_XMM
+yuv2planeX_fn sse2,  8, 10, 7
+yuv2planeX_fn sse2,  9,  7, 5
+yuv2planeX_fn sse2, 10,  7, 5
+
+%define PALIGNR PALIGNR_SSSE3
+yuv2planeX_fn sse4,  8, 10, 7
+yuv2planeX_fn sse4,  9,  7, 5
+yuv2planeX_fn sse4, 10,  7, 5
+yuv2planeX_fn sse4, 16,  8, 5
+
+INIT_AVX
+yuv2planeX_fn avx,   8, 10, 7
+yuv2planeX_fn avx,   9,  7, 5
+yuv2planeX_fn avx,  10,  7, 5
+
+; %1=outout-bpc, %2=alignment (u/a)
+%macro yuv2plane1_mainloop 2
+.loop_%2:
+%if %1 == 8
+    paddsw          m0, m2, [r0+r2*2+mmsize*0]
+    paddsw          m1, m3, [r0+r2*2+mmsize*1]
+    psraw           m0, 7
+    psraw           m1, 7
+    packuswb        m0, m1
+    mov%2      [r1+r2], m0
+%elif %1 == 16
+    paddd           m0, m4, [r0+r2*4+mmsize*0]
+    paddd           m1, m4, [r0+r2*4+mmsize*1]
+    paddd           m2, m4, [r0+r2*4+mmsize*2]
+    paddd           m3, m4, [r0+r2*4+mmsize*3]
+    psrad           m0, 3
+    psrad           m1, 3
+    psrad           m2, 3
+    psrad           m3, 3
+%if cpuflag(sse4) ; avx/sse4
+    packusdw        m0, m1
+    packusdw        m2, m3
+%else ; mmx/sse2
+    packssdw        m0, m1
+    packssdw        m2, m3
+    paddw           m0, m5
+    paddw           m2, m5
+%endif ; mmx/sse2/sse4/avx
+    mov%2    [r1+r2*2], m0
+    mov%2    [r1+r2*2+mmsize], m2
+%else
+    paddsw          m0, m2, [r0+r2*2+mmsize*0]
+    paddsw          m1, m2, [r0+r2*2+mmsize*1]
+    psraw           m0, 15 - %1
+    psraw           m1, 15 - %1
+    pmaxsw          m0, m4
+    pmaxsw          m1, m4
+    pminsw          m0, m3
+    pminsw          m1, m3
+    mov%2    [r1+r2*2], m0
+    mov%2    [r1+r2*2+mmsize], m1
+%endif
+    add             r2, mmsize
+    jl .loop_%2
+%endmacro
+
+%macro yuv2plane1_fn 3
+cglobal yuv2plane1_%1, %3, %3, %2
+    add             r2, mmsize - 1
+    and             r2, ~(mmsize - 1)
+%if %1 == 8
+    add             r1, r2
+%else ; %1 != 8
+    lea             r1, [r1+r2*2]
+%endif ; %1 == 8
+%if %1 == 16
+    lea             r0, [r0+r2*4]
+%else ; %1 != 16
+    lea             r0, [r0+r2*2]
+%endif ; %1 == 16
+    neg             r2
+
+%if %1 == 8
+    pxor            m4, m4               ; zero
+
+    ; create registers holding dither
+    movq            m3, [r3]             ; dither
+    test           r4d, r4d
+    jz              .no_rot
+%if mmsize == 16
+    punpcklqdq      m3, m3
+%endif ; mmsize == 16
+    PALIGNR_MMX     m3, m3, 3, m2
+.no_rot:
+%if mmsize == 8
+    mova            m2, m3
+    punpckhbw       m3, m4               ; byte->word
+    punpcklbw       m2, m4               ; byte->word
+%else
+    punpcklbw       m3, m4
+    mova            m2, m3
+%endif
+%elif %1 == 9
+    pxor            m4, m4
+    mova            m3, [pw_512]
+    mova            m2, [pw_32]
+%elif %1 == 10
+    pxor            m4, m4
+    mova            m3, [pw_1024]
+    mova            m2, [pw_16]
+%else ; %1 == 16
+%if cpuflag(sse4) ; sse4/avx
+    mova            m4, [pd_4]
+%else ; mmx/sse2
+    mova            m4, [pd_4min0x40000]
+    mova            m5, [minshort]
+%endif ; mmx/sse2/sse4/avx
+%endif ; %1 == ..
+
+    ; actual pixel scaling
+%if mmsize == 8
+    yuv2plane1_mainloop %1, a
+%else ; mmsize == 16
+    test            r1, 15
+    jnz .unaligned
+    yuv2plane1_mainloop %1, a
+    REP_RET
+.unaligned:
+    yuv2plane1_mainloop %1, u
+%endif ; mmsize == 8/16
+    REP_RET
+%endmacro
+
+%ifdef ARCH_X86_32
+INIT_MMX mmx
+yuv2plane1_fn  8, 0, 5
+yuv2plane1_fn 16, 0, 3
+
+INIT_MMX mmx2
+yuv2plane1_fn  9, 0, 3
+yuv2plane1_fn 10, 0, 3
+%endif
+
+INIT_XMM sse2
+yuv2plane1_fn  8, 5, 5
+yuv2plane1_fn  9, 5, 3
+yuv2plane1_fn 10, 5, 3
+yuv2plane1_fn 16, 6, 3
+
+INIT_XMM sse4
+yuv2plane1_fn 16, 5, 3
+
+INIT_XMM avx
+yuv2plane1_fn  8, 5, 5
+yuv2plane1_fn  9, 5, 3
+yuv2plane1_fn 10, 5, 3
+yuv2plane1_fn 16, 5, 3
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@ -1,7 +1,6 @@
 ;******************************************************************************
-;* x86-optimized horizontal/vertical line scaling functions
+;* x86-optimized horizontal line scaling functions
 ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
-;*                    Kieran Kunhya <kieran@kunhya.com>
 ;*
 ;* This file is part of Libav.
 ;*
@ -29,17 +28,6 @@ max_19bit_int: times 4 dd 0x7ffff
 max_19bit_flt: times 4 dd 524287.0
 minshort:      times 8 dw 0x8000
 unicoeff:      times 4 dd 0x20000000
-yuv2yuvX_16_start:  times 4 dd 0x4000 - 0x40000000
-yuv2yuvX_10_start:  times 4 dd 0x10000
-yuv2yuvX_9_start:   times 4 dd 0x20000
-yuv2yuvX_10_upper:  times 8 dw 0x3ff
-yuv2yuvX_9_upper:   times 8 dw 0x1ff
-pd_4:          times 4 dd 4
-pd_4min0x40000:times 4 dd 4 - (0x40000)
-pw_16:         times 8 dw 16
-pw_32:         times 8 dw 32
-pw_512:        times 8 dw 512
-pw_1024:       times 8 dw 1024

 SECTION .text

@ -441,371 +429,3 @@ INIT_XMM
 SCALE_FUNCS2 sse2,  6, 7, 8
 SCALE_FUNCS2 ssse3, 6, 6, 8
 SCALE_FUNCS2 sse4,  6, 6, 8
-
-;-----------------------------------------------------------------------------
-; vertical line scaling
-;
-; void yuv2plane1_<output_size>_<opt>(const int16_t *src, uint8_t *dst, int dstW,
-;                                     const uint8_t *dither, int offset)
-; and
-; void yuv2planeX_<output_size>_<opt>(const int16_t *filter, int filterSize,
-;                                     const int16_t **src, uint8_t *dst, int dstW,
-;                                     const uint8_t *dither, int offset)
-;
-; Scale one or $filterSize lines of source data to generate one line of output
-; data. The input is 15-bit in int16_t if $output_size is [8,10] and 19-bit in
-; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
-; of 2. $offset is either 0 or 3. $dither holds 8 values.
-;-----------------------------------------------------------------------------
-
-%macro yuv2planeX_fn 4
-
-%ifdef ARCH_X86_32
-%define cntr_reg r1
-%define movsx mov
-%else
-%define cntr_reg r11
-%define movsx movsxd
-%endif
-
-cglobal yuv2planeX_%2_%1, %4, 7, %3
-%if %2 == 8 || %2 == 9 || %2 == 10
-    pxor            m6,  m6
-%endif ; %2 == 8/9/10
-
-%if %2 == 8
-%ifdef ARCH_X86_32
-%assign pad 0x2c - (stack_offset & 15)
-    SUB             rsp, pad
-%define m_dith m7
-%else ; x86-64
-%define m_dith m9
-%endif ; x86-32
-
-    ; create registers holding dither
-    movq        m_dith, [r5]             ; dither
-    test            r6d, r6d
-    jz              .no_rot
-%if mmsize == 16
-    punpcklqdq  m_dith,  m_dith
-%endif ; mmsize == 16
-    PALIGNR     m_dith,  m_dith,  3,  m0
-.no_rot:
-%if mmsize == 16
-    punpcklbw   m_dith,  m6
-%ifdef ARCH_X86_64
-    punpcklwd       m8,  m_dith,  m6
-    pslld           m8,  12
-%else ; x86-32
-    punpcklwd       m5,  m_dith,  m6
-    pslld           m5,  12
-%endif ; x86-32/64
-    punpckhwd   m_dith,  m6
-    pslld       m_dith,  12
-%ifdef ARCH_X86_32
-    mova      [rsp+ 0],  m5
-    mova      [rsp+16],  m_dith
-%endif
-%else ; mmsize == 8
-    punpcklbw       m5,  m_dith,  m6
-    punpckhbw   m_dith,  m6
-    punpcklwd       m4,  m5,  m6
-    punpckhwd       m5,  m6
-    punpcklwd       m3,  m_dith,  m6
-    punpckhwd   m_dith,  m6
-    pslld           m4,  12
-    pslld           m5,  12
-    pslld           m3,  12
-    pslld       m_dith,  12
-    mova      [rsp+ 0],  m4
-    mova      [rsp+ 8],  m5
-    mova      [rsp+16],  m3
-    mova      [rsp+24],  m_dith
-%endif ; mmsize == 8/16
-%endif ; %2 == 8
-
-    xor             r5,  r5
-
-.pixelloop:
-%assign %%i 0
-    ; the rep here is for the 8bit output mmx case, where dither covers
-    ; 8 pixels but we can only handle 2 pixels per register, and thus 4
-    ; pixels per iteration. In order to not have to keep track of where
-    ; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
-%if %2 == 8
-%rep 16/mmsize
-%endif ; %2 == 8
-
-%if %2 == 8
-%ifdef ARCH_X86_32
-    mova            m2, [rsp+mmsize*(0+%%i)]
-    mova            m1, [rsp+mmsize*(1+%%i)]
-%else ; x86-64
-    mova            m2,  m8
-    mova            m1,  m_dith
-%endif ; x86-32/64
-%else ; %2 == 9/10/16
-    mova            m1, [yuv2yuvX_%2_start]
-    mova            m2,  m1
-%endif ; %2 == 8/9/10/16
-    movsx     cntr_reg,  r1m
-.filterloop_ %+ %%i:
-    ; input pixels
-    mov             r6, [r2+gprsize*cntr_reg-2*gprsize]
-%if %2 == 16
-    mova            m3, [r6+r5*4]
-    mova            m5, [r6+r5*4+mmsize]
-%else ; %2 == 8/9/10
-    mova            m3, [r6+r5*2]
-%endif ; %2 == 8/9/10/16
-    mov             r6, [r2+gprsize*cntr_reg-gprsize]
-%if %2 == 16
-    mova            m4, [r6+r5*4]
-    mova            m6, [r6+r5*4+mmsize]
-%else ; %2 == 8/9/10
-    mova            m4, [r6+r5*2]
-%endif ; %2 == 8/9/10/16
-
-    ; coefficients
-    movd            m0, [r0+2*cntr_reg-4]; coeff[0], coeff[1]
-%if %2 == 16
-    pshuflw         m7,  m0,  0          ; coeff[0]
-    pshuflw         m0,  m0,  0x55       ; coeff[1]
-    pmovsxwd        m7,  m7              ; word -> dword
-    pmovsxwd        m0,  m0              ; word -> dword
-
-    pmulld          m3,  m7
-    pmulld          m5,  m7
-    pmulld          m4,  m0
-    pmulld          m6,  m0
-
-    paddd           m2,  m3
-    paddd           m1,  m5
-    paddd           m2,  m4
-    paddd           m1,  m6
-%else ; %2 == 10/9/8
-    punpcklwd       m5,  m3,  m4
-    punpckhwd       m3,  m4
-    SPLATD          m0,  m0
-
-    pmaddwd         m5,  m0
-    pmaddwd         m3,  m0
-
-    paddd           m2,  m5
-    paddd           m1,  m3
-%endif ; %2 == 8/9/10/16
-
-    sub       cntr_reg,  2
-    jg .filterloop_ %+ %%i
-
-%if %2 == 16
-    psrad           m2,  31 - %2
-    psrad           m1,  31 - %2
-%else ; %2 == 10/9/8
-    psrad           m2,  27 - %2
-    psrad           m1,  27 - %2
-%endif ; %2 == 8/9/10/16
-
-%if %2 == 8
-    packssdw        m2,  m1
-    packuswb        m2,  m2
-    movh     [r3+r5*1],  m2
-%else ; %2 == 9/10/16
-%if %2 == 16
-    packssdw        m2,  m1
-    paddw           m2, [minshort]
-%else ; %2 == 9/10
-%ifidn %1, sse4
-    packusdw        m2,  m1
-%elifidn %1, avx
-    packusdw        m2,  m1
-%else ; mmx2/sse2
-    packssdw        m2,  m1
-    pmaxsw          m2,  m6
-%endif ; mmx2/sse2/sse4/avx
-    pminsw          m2, [yuv2yuvX_%2_upper]
-%endif ; %2 == 9/10/16
-    mova     [r3+r5*2],  m2
-%endif ; %2 == 8/9/10/16
-
-    add             r5,  mmsize/2
-    sub             r4d, mmsize/2
-%if %2 == 8
-%assign %%i %%i+2
-%endrep
-%endif ; %2 == 8
-    jg .pixelloop
-
-%if %2 == 8
-%ifdef ARCH_X86_32
-    ADD             rsp, pad
-    RET
-%else ; x86-64
-    REP_RET
-%endif ; x86-32/64
-%else ; %2 == 9/10/16
-    REP_RET
-%endif ; %2 == 8/9/10/16
-%endmacro
-
-%define PALIGNR PALIGNR_MMX
-%ifdef ARCH_X86_32
-INIT_MMX
-yuv2planeX_fn mmx2,  8,  0, 7
-yuv2planeX_fn mmx2,  9,  0, 5
-yuv2planeX_fn mmx2, 10,  0, 5
-%endif
-
-INIT_XMM
-yuv2planeX_fn sse2,  8, 10, 7
-yuv2planeX_fn sse2,  9,  7, 5
-yuv2planeX_fn sse2, 10,  7, 5
-
-%define PALIGNR PALIGNR_SSSE3
-yuv2planeX_fn sse4,  8, 10, 7
-yuv2planeX_fn sse4,  9,  7, 5
-yuv2planeX_fn sse4, 10,  7, 5
-yuv2planeX_fn sse4, 16,  8, 5
-
-INIT_AVX
-yuv2planeX_fn avx,   8, 10, 7
-yuv2planeX_fn avx,   9,  7, 5
-yuv2planeX_fn avx,  10,  7, 5
-
-; %1=outout-bpc, %2=alignment (u/a)
-%macro yuv2plane1_mainloop 2
-.loop_%2:
-%if %1 == 8
-    paddsw          m0, m2, [r0+r2*2+mmsize*0]
-    paddsw          m1, m3, [r0+r2*2+mmsize*1]
-    psraw           m0, 7
-    psraw           m1, 7
-    packuswb        m0, m1
-    mov%2      [r1+r2], m0
-%elif %1 == 16
-    paddd           m0, m4, [r0+r2*4+mmsize*0]
-    paddd           m1, m4, [r0+r2*4+mmsize*1]
-    paddd           m2, m4, [r0+r2*4+mmsize*2]
-    paddd           m3, m4, [r0+r2*4+mmsize*3]
-    psrad           m0, 3
-    psrad           m1, 3
-    psrad           m2, 3
-    psrad           m3, 3
-%if cpuflag(sse4) ; avx/sse4
-    packusdw        m0, m1
-    packusdw        m2, m3
-%else ; mmx/sse2
-    packssdw        m0, m1
-    packssdw        m2, m3
-    paddw           m0, m5
-    paddw           m2, m5
-%endif ; mmx/sse2/sse4/avx
-    mov%2    [r1+r2*2], m0
-    mov%2    [r1+r2*2+mmsize], m2
-%else
-    paddsw          m0, m2, [r0+r2*2+mmsize*0]
-    paddsw          m1, m2, [r0+r2*2+mmsize*1]
-    psraw           m0, 15 - %1
-    psraw           m1, 15 - %1
-    pmaxsw          m0, m4
-    pmaxsw          m1, m4
-    pminsw          m0, m3
-    pminsw          m1, m3
-    mov%2    [r1+r2*2], m0
-    mov%2    [r1+r2*2+mmsize], m1
-%endif
-    add             r2, mmsize
-    jl .loop_%2
-%endmacro
-
-%macro yuv2plane1_fn 3
-cglobal yuv2plane1_%1, %3, %3, %2
-    add             r2, mmsize - 1
-    and             r2, ~(mmsize - 1)
-%if %1 == 8
-    add             r1, r2
-%else ; %1 != 8
-    lea             r1, [r1+r2*2]
-%endif ; %1 == 8
-%if %1 == 16
-    lea             r0, [r0+r2*4]
-%else ; %1 != 16
-    lea             r0, [r0+r2*2]
-%endif ; %1 == 16
-    neg             r2
-
-%if %1 == 8
-    pxor            m4, m4               ; zero
-
-    ; create registers holding dither
-    movq            m3, [r3]             ; dither
-    test           r4d, r4d
-    jz              .no_rot
-%if mmsize == 16
-    punpcklqdq      m3, m3
-%endif ; mmsize == 16
-    PALIGNR_MMX     m3, m3, 3, m2
-.no_rot:
-%if mmsize == 8
-    mova            m2, m3
-    punpckhbw       m3, m4               ; byte->word
-    punpcklbw       m2, m4               ; byte->word
-%else
-    punpcklbw       m3, m4
-    mova            m2, m3
-%endif
-%elif %1 == 9
-    pxor            m4, m4
-    mova            m3, [pw_512]
-    mova            m2, [pw_32]
-%elif %1 == 10
-    pxor            m4, m4
-    mova            m3, [pw_1024]
-    mova            m2, [pw_16]
-%else ; %1 == 16
-%if cpuflag(sse4) ; sse4/avx
-    mova            m4, [pd_4]
-%else ; mmx/sse2
-    mova            m4, [pd_4min0x40000]
-    mova            m5, [minshort]
-%endif ; mmx/sse2/sse4/avx
-%endif ; %1 == ..
-
-    ; actual pixel scaling
-%if mmsize == 8
-    yuv2plane1_mainloop %1, a
-%else ; mmsize == 16
-    test            r1, 15
-    jnz .unaligned
-    yuv2plane1_mainloop %1, a
-    REP_RET
-.unaligned:
-    yuv2plane1_mainloop %1, u
-%endif ; mmsize == 8/16
-    REP_RET
-%endmacro
-
-%ifdef ARCH_X86_32
-INIT_MMX mmx
-yuv2plane1_fn  8, 0, 5
-yuv2plane1_fn 16, 0, 3
-
-INIT_MMX mmx2
-yuv2plane1_fn  9, 0, 3
-yuv2plane1_fn 10, 0, 3
-%endif
-
-INIT_XMM sse2
-yuv2plane1_fn  8, 5, 5
-yuv2plane1_fn  9, 5, 3
-yuv2plane1_fn 10, 5, 3
-yuv2plane1_fn 16, 6, 3
-
-INIT_XMM sse4
-yuv2plane1_fn 16, 5, 3
-
-INIT_XMM avx
-yuv2plane1_fn  8, 5, 5
-yuv2plane1_fn  9, 5, 3
-yuv2plane1_fn 10, 5, 3
-yuv2plane1_fn 16, 5, 3
--- a/tests/codec-regression.sh
+++ b/tests/codec-regression.sh
@ -369,6 +369,11 @@ do_audio_encoding g726.wav "-b:a 32k -ac 1 -ar 8000 -acodec g726"
 do_audio_decoding
 fi

+if [ -n "$do_adpcm_adx" ] ; then
+do_audio_encoding adpcm_adx.adx "-acodec adpcm_adx"
+do_audio_decoding
+fi
+
 if [ -n "$do_adpcm_ima_wav" ] ; then
 do_audio_encoding adpcm_ima.wav "-acodec adpcm_ima_wav"
 do_audio_decoding
--- a/tests/ref/acodec/adpcm_adx
+++ b/tests/ref/acodec/adpcm_adx
@ -0,0 +1,4 @@
+0a30509d9296b857e134b762b76dbc31 *./tests/data/acodec/adpcm_adx.adx
+297720 ./tests/data/acodec/adpcm_adx.adx
+2dbc601ed5259f4d74dc48ccd8da7eaf *./tests/data/adpcm_adx.acodec.out.wav
+stddev: 6989.46 PSNR: 19.44 MAXDIFF:65398 bytes:  1058432/  1058400