Merge remote-tracking branch 'qatar/master'

* qatar/master: sgidec: Use bytestream2 functions to prevent buffer overreads. cosmetics: Move static and inline attributes to more standard places. configure: provide libavfilter/version.h header to get_version() swscale: change yuv2yuvX code to use cpuflag(). libx264: Don't leave max_b_frames as -1 if the user didn't set it FATE: convert output to rgba for the targa tests which currently output pal8 fate: add missing reference files for targa tests in 9c2f9b0e2 FATE: enable the 2 remaining targa conformance suite tests targa: add support for rgb555 palette FATE: fix targa tests on big-endian systems Conflicts: libavcodec/sgidec.c libavcodec/targa.c libswscale/x86/output.asm tests/fate/image.mak Merged-by: Michael Niedermayer <michaelni@gmx.at>
13 years ago · 4640da7e58
parent a91f206665 4cd0bdae9a
commit 4640da7e58
12 changed files with 231 additions and 146 deletions
--- a/libavcodec/bytestream.h
+++ b/libavcodec/bytestream.h
@ -75,6 +75,42 @@ DEF  (byte, 1, AV_RB8 , AV_WB8 )
 #undef DEF64
 #undef DEF_T

+#if HAVE_BIGENDIAN
+#   define bytestream2_get_ne16  bytestream2_get_be16
+#   define bytestream2_get_ne24  bytestream2_get_be24
+#   define bytestream2_get_ne32  bytestream2_get_be32
+#   define bytestream2_get_ne64  bytestream2_get_be64
+#   define bytestream2_get_ne16u bytestream2_get_be16u
+#   define bytestream2_get_ne24u bytestream2_get_be24u
+#   define bytestream2_get_ne32u bytestream2_get_be32u
+#   define bytestream2_get_ne64u bytestream2_get_be64u
+#   define bytestream2_put_ne16  bytestream2_put_be16
+#   define bytestream2_put_ne24  bytestream2_put_be24
+#   define bytestream2_put_ne32  bytestream2_put_be32
+#   define bytestream2_put_ne64  bytestream2_put_be64
+#   define bytestream2_peek_ne16 bytestream2_peek_be16
+#   define bytestream2_peek_ne24 bytestream2_peek_be24
+#   define bytestream2_peek_ne32 bytestream2_peek_be32
+#   define bytestream2_peek_ne64 bytestream2_peek_be64
+#else
+#   define bytestream2_get_ne16  bytestream2_get_le16
+#   define bytestream2_get_ne24  bytestream2_get_le24
+#   define bytestream2_get_ne32  bytestream2_get_le32
+#   define bytestream2_get_ne64  bytestream2_get_le64
+#   define bytestream2_get_ne16u bytestream2_get_le16u
+#   define bytestream2_get_ne24u bytestream2_get_le24u
+#   define bytestream2_get_ne32u bytestream2_get_le32u
+#   define bytestream2_get_ne64u bytestream2_get_le64u
+#   define bytestream2_put_ne16  bytestream2_put_le16
+#   define bytestream2_put_ne24  bytestream2_put_le24
+#   define bytestream2_put_ne32  bytestream2_put_le32
+#   define bytestream2_put_ne64  bytestream2_put_le64
+#   define bytestream2_peek_ne16 bytestream2_peek_le16
+#   define bytestream2_peek_ne24 bytestream2_peek_le24
+#   define bytestream2_peek_ne32 bytestream2_peek_le32
+#   define bytestream2_peek_ne64 bytestream2_peek_le64
+#endif
+
 static av_always_inline void bytestream2_init(GetByteContext *g,
                                              const uint8_t *buf, int buf_size)
 {
--- a/libavcodec/g722.c
+++ b/libavcodec/g722.c
@ -129,7 +129,7 @@ static void do_adaptive_prediction(struct G722Band *band, const int cur_diff)
    band->prev_qtzd_reconst = cur_qtzd_reconst;
 }

-static int inline linear_scale_factor(const int log_factor)
+static inline int linear_scale_factor(const int log_factor)
 {
    const int wd1 = inv_log2_table[(log_factor >> 6) & 31];
    const int shift = log_factor >> 11;
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@ -101,7 +101,11 @@ static const uint8_t tc0_table[52*3][4] = {
 };

 /* intra: 0 if this loopfilter call is guaranteed to be inter (bS < 4), 1 if it might be intra (bS == 4) */
-static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) {
+static av_always_inline void filter_mb_edgev(uint8_t *pix, int stride,
+                                             const int16_t bS[4],
+                                             unsigned int qp, int a, int b,
+                                             H264Context *h, int intra)
+{
    const unsigned int index_a = qp + a;
    const int alpha = alpha_table[index_a];
    const int beta  = beta_table[qp + b];
@ -118,7 +122,12 @@ static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, const in
        h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
    }
 }
-static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) {
+
+static av_always_inline void filter_mb_edgecv(uint8_t *pix, int stride,
+                                              const int16_t bS[4],
+                                              unsigned int qp, int a, int b,
+                                              H264Context *h, int intra)
+{
    const unsigned int index_a = qp + a;
    const int alpha = alpha_table[index_a];
    const int beta  = beta_table[qp + b];
@ -136,7 +145,12 @@ static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, const i
    }
 }

-static void av_always_inline filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, const int16_t bS[7], int bsi, int qp, int a, int b, int intra ) {
+static av_always_inline void filter_mb_mbaff_edgev(H264Context *h, uint8_t *pix,
+                                                   int stride,
+                                                   const int16_t bS[7], int bsi,
+                                                   int qp, int a, int b,
+                                                   int intra)
+{
    const unsigned int index_a = qp + a;
    const int alpha = alpha_table[index_a];
    const int beta  = beta_table[qp + b];
@ -153,7 +167,13 @@ static void av_always_inline filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix
        h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
    }
 }
-static void av_always_inline filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, const int16_t bS[7], int bsi, int qp, int a, int b, int intra ) {
+
+static av_always_inline void filter_mb_mbaff_edgecv(H264Context *h,
+                                                    uint8_t *pix, int stride,
+                                                    const int16_t bS[7],
+                                                    int bsi, int qp, int a,
+                                                    int b, int intra)
+{
    const unsigned int index_a = qp + a;
    const int alpha = alpha_table[index_a];
    const int beta  = beta_table[qp + b];
@ -171,7 +191,11 @@ static void av_always_inline filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pi
    }
 }

-static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) {
+static av_always_inline void filter_mb_edgeh(uint8_t *pix, int stride,
+                                             const int16_t bS[4],
+                                             unsigned int qp, int a, int b,
+                                             H264Context *h, int intra)
+{
    const unsigned int index_a = qp + a;
    const int alpha = alpha_table[index_a];
    const int beta  = beta_table[qp + b];
@ -189,7 +213,11 @@ static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, const in
    }
 }

-static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, const int16_t bS[4], unsigned int qp, int a, int b, H264Context *h, int intra ) {
+static av_always_inline void filter_mb_edgech(uint8_t *pix, int stride,
+                                              const int16_t bS[4],
+                                              unsigned int qp, int a, int b,
+                                              H264Context *h, int intra)
+{
    const unsigned int index_a = qp + a;
    const int alpha = alpha_table[index_a];
    const int beta  = beta_table[qp + b];
@ -207,8 +235,15 @@ static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, const i
    }
 }

-static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
-                                                           unsigned int linesize, unsigned int uvlinesize, int pixel_shift) {
+static av_always_inline void h264_filter_mb_fast_internal(H264Context *h,
+                                                          int mb_x, int mb_y,
+                                                          uint8_t *img_y,
+                                                          uint8_t *img_cb,
+                                                          uint8_t *img_cr,
+                                                          unsigned int linesize,
+                                                          unsigned int uvlinesize,
+                                                          int pixel_shift)
+{
    MpegEncContext * const s = &h->s;
    int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
    int chroma444 = CHROMA444;
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@ -532,6 +532,9 @@ static av_cold int X264_init(AVCodecContext *avctx)
    // update AVCodecContext with x264 parameters
    avctx->has_b_frames = x4->params.i_bframe ?
        x4->params.i_bframe_pyramid ? 2 : 1 : 0;
+    if (avctx->max_b_frames < 0)
+        avctx->max_b_frames = 0;
+
    avctx->bit_rate = x4->params.rc.i_bitrate*1000;
 #if FF_API_X264_GLOBAL_OPTS
    avctx->crf = x4->params.rc.f_rf_constant;
--- a/libavcodec/sgidec.c
+++ b/libavcodec/sgidec.c
@ -32,26 +32,27 @@ typedef struct SgiState {
    unsigned int depth;
    unsigned int bytes_per_channel;
    int linesize;
+    GetByteContext g;
 } SgiState;

 /**
 * Expand an RLE row into a channel.
- * @param in_buf input buffer
- * @param in_end end of input buffer
+ * @param s the current image state
 * @param out_buf Points to one line after the output buffer.
 * @param out_end end of line in output buffer
 * @param pixelstride pixel stride of input buffer
 * @return size of output in bytes, -1 if buffer overflows
 */
-static int expand_rle_row(const uint8_t *in_buf, const uint8_t* in_end,
-            unsigned char *out_buf, uint8_t* out_end, int pixelstride)
+static int expand_rle_row(SgiState *s, uint8_t *out_buf,
+                          uint8_t *out_end, int pixelstride)
 {
    unsigned char pixel, count;
    unsigned char *orig = out_buf;

    while (1) {
-        if(in_buf + 1 > in_end) return -1;
-        pixel = bytestream_get_byte(&in_buf);
+        if (bytestream2_get_bytes_left(&s->g) < 1)
+            return AVERROR_INVALIDDATA;
+        pixel = bytestream2_get_byteu(&s->g);
        if (!(count = (pixel & 0x7f))) {
            return (out_buf - orig) / pixelstride;
        }
@ -61,11 +62,11 @@ static int expand_rle_row(const uint8_t *in_buf, const uint8_t* in_end,

        if (pixel & 0x80) {
            while (count--) {
-                *out_buf = bytestream_get_byte(&in_buf);
+                *out_buf = bytestream2_get_byte(&s->g);
                out_buf += pixelstride;
            }
        } else {
-            pixel = bytestream_get_byte(&in_buf);
+            pixel = bytestream2_get_byte(&s->g);

            while (count--) {
                *out_buf = pixel;
@ -78,85 +79,73 @@ static int expand_rle_row(const uint8_t *in_buf, const uint8_t* in_end,
 /**
 * Read a run length encoded SGI image.
 * @param out_buf output buffer
- * @param in_buf input buffer
- * @param in_end end of input buffer
 * @param s the current image state
 * @return 0 if no error, else return error number.
 */
-static int read_rle_sgi(unsigned char* out_buf, const uint8_t *in_buf,
-                        const uint8_t *in_end, SgiState* s)
+static int read_rle_sgi(uint8_t *out_buf, SgiState *s)
 {
    uint8_t *dest_row;
    unsigned int len = s->height * s->depth * 4;
-    const uint8_t *start_table = in_buf;
+    GetByteContext g_table = s->g;
    unsigned int y, z;
    unsigned int start_offset;

    /* size of  RLE offset and length tables */
-    if(len * 2  > in_end - in_buf) {
+    if (len * 2  > bytestream2_get_bytes_left(&s->g)) {
        return AVERROR_INVALIDDATA;
    }

-    in_buf -= SGI_HEADER_SIZE;
    for (z = 0; z < s->depth; z++) {
        dest_row = out_buf;
        for (y = 0; y < s->height; y++) {
            dest_row -= s->linesize;
-            start_offset = bytestream_get_be32(&start_table);
-            if(start_offset > in_end - in_buf) {
+            start_offset = bytestream2_get_be32(&g_table);
+            bytestream2_seek(&s->g, start_offset, SEEK_SET);
+            if (expand_rle_row(s, dest_row + z, dest_row + FFABS(s->linesize),
+                               s->depth) != s->width) {
                return AVERROR_INVALIDDATA;
            }
-            if (expand_rle_row(in_buf + start_offset, in_end, dest_row + z,
-                dest_row + FFABS(s->linesize), s->depth) != s->width)
-                return AVERROR_INVALIDDATA;
        }
    }
    return 0;
 }

-static av_always_inline void copy_loop(uint8_t *out_buf, const uint8_t *in_buf,
-                                       unsigned offset, unsigned bytes_per_channel,
-                                       SgiState *s)
-{
-    int x, y, z;
-    for (y = s->height - 1; y >= 0; y--) {
-        uint8_t *line = out_buf + (y * s->linesize);
-        for (x = s->width; x > 0; x--) {
-            const uint8_t *ptr = in_buf;
-            in_buf += bytes_per_channel;
-            for(z = 0; z < s->depth; z ++) {
-                memcpy(line, ptr, bytes_per_channel);
-                line += bytes_per_channel;
-                ptr += offset;
-            }
-        }
-    }
-}
-
 /**
 * Read an uncompressed SGI image.
 * @param out_buf output buffer
 * @param out_end end ofoutput buffer
- * @param in_buf input buffer
- * @param in_end end of input buffer
 * @param s the current image state
 * @return 0 if read success, otherwise return -1.
 */
 static int read_uncompressed_sgi(unsigned char* out_buf, uint8_t* out_end,
-                const uint8_t *in_buf, const uint8_t *in_end, SgiState* s)
+                                 SgiState *s)
 {
+    int x, y, z;
    unsigned int offset = s->height * s->width * s->bytes_per_channel;
+    GetByteContext gp[4];

    /* Test buffer size. */
-    if (offset * s->depth > in_end - in_buf) {
-       return -1;
+    if (offset * s->depth > bytestream2_get_bytes_left(&s->g))
+        return AVERROR_INVALIDDATA;
+
+    /* Create a reader for each plane */
+    for (z = 0; z < s->depth; z++) {
+        gp[z] = s->g;
+        bytestream2_skip(&gp[z], z * offset);
    }

-    if (s->bytes_per_channel == 2) {
-        copy_loop(out_buf, in_buf, offset, 2, s);
-    } else {
-        av_assert1(s->bytes_per_channel == 1);
-        copy_loop(out_buf, in_buf, offset, 1, s);
+    for (y = s->height - 1; y >= 0; y--) {
+        out_end = out_buf + (y * s->linesize);
+        if (s->bytes_per_channel == 1) {
+            for (x = s->width; x > 0; x--)
+                for (z = 0; z < s->depth; z++)
+                    *out_end++ = bytestream2_get_byteu(&gp[z]);
+        } else {
+            uint16_t *out16 = (uint16_t *)out_end;
+            for (x = s->width; x > 0; x--)
+                for (z = 0; z < s->depth; z++)
+                    *out16++ = bytestream2_get_ne16u(&gp[z]);
+        }
    }
    return 0;
 }
@ -165,33 +154,31 @@ static int decode_frame(AVCodecContext *avctx,
                        void *data, int *data_size,
                        AVPacket *avpkt)
 {
-    const uint8_t *in_buf = avpkt->data;
-    int buf_size = avpkt->size;
    SgiState *s = avctx->priv_data;
    AVFrame *picture = data;
    AVFrame *p = &s->picture;
-    const uint8_t *in_end = in_buf + buf_size;
    unsigned int dimension, rle;
    int ret = 0;
    uint8_t *out_buf, *out_end;

-    if (buf_size < SGI_HEADER_SIZE){
-        av_log(avctx, AV_LOG_ERROR, "buf_size too small (%d)\n", buf_size);
-        return -1;
+    bytestream2_init(&s->g, avpkt->data, avpkt->size);
+    if (bytestream2_get_bytes_left(&s->g) < SGI_HEADER_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "buf_size too small (%d)\n", avpkt->size);
+        return AVERROR_INVALIDDATA;
    }

    /* Test for SGI magic. */
-    if (bytestream_get_be16(&in_buf) != SGI_MAGIC) {
+    if (bytestream2_get_be16(&s->g) != SGI_MAGIC) {
        av_log(avctx, AV_LOG_ERROR, "bad magic number\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
    }

-    rle = bytestream_get_byte(&in_buf);
-    s->bytes_per_channel = bytestream_get_byte(&in_buf);
-    dimension = bytestream_get_be16(&in_buf);
-    s->width  = bytestream_get_be16(&in_buf);
-    s->height = bytestream_get_be16(&in_buf);
-    s->depth  = bytestream_get_be16(&in_buf);
+    rle                  = bytestream2_get_byte(&s->g);
+    s->bytes_per_channel = bytestream2_get_byte(&s->g);
+    dimension            = bytestream2_get_be16(&s->g);
+    s->width             = bytestream2_get_be16(&s->g);
+    s->height            = bytestream2_get_be16(&s->g);
+    s->depth             = bytestream2_get_be16(&s->g);

    if (s->bytes_per_channel != 1 && (s->bytes_per_channel != 2 || rle)) {
        av_log(avctx, AV_LOG_ERROR, "wrong channel number\n");
@ -237,19 +224,19 @@ static int decode_frame(AVCodecContext *avctx,
    s->linesize = p->linesize[0];

    /* Skip header. */
-    in_buf += SGI_HEADER_SIZE - 12;
+    bytestream2_seek(&s->g, SGI_HEADER_SIZE, SEEK_SET);
    if (rle) {
-        ret = read_rle_sgi(out_end, in_buf, in_end, s);
+        ret = read_rle_sgi(out_end, s);
    } else {
-        ret = read_uncompressed_sgi(out_buf, out_end, in_buf, in_end, s);
+        ret = read_uncompressed_sgi(out_buf, out_end, s);
    }

    if (ret == 0) {
        *picture   = s->picture;
        *data_size = sizeof(AVPicture);
-        return buf_size;
+        return avpkt->size;
    } else {
-        return -1;
+        return ret;
    }
 }

--- a/libavcodec/targa.c
+++ b/libavcodec/targa.c
@ -178,24 +178,45 @@ static int decode_frame(AVCodecContext *avctx,
    }

    if(colors){
-        size_t pal_size;
+        int pal_size, pal_sample_size;
        if((colors + first_clr) > 256){
            av_log(avctx, AV_LOG_ERROR, "Incorrect palette: %i colors with offset %i\n", colors, first_clr);
            return -1;
        }
-        if(csize != 24){
+        switch (csize) {
+        case 24: pal_sample_size = 3; break;
+        case 16:
+        case 15: pal_sample_size = 2; break;
+        default:
            av_log(avctx, AV_LOG_ERROR, "Palette entry size %i bits is not supported\n", csize);
            return -1;
        }
-        pal_size = colors * ((csize + 1) >> 3);
+        pal_size = colors * pal_sample_size;
        CHECK_BUFFER_SIZE(buf, buf_end, pal_size, "color table");
        if(avctx->pix_fmt != PIX_FMT_PAL8)//should not occur but skip palette anyway
            buf += pal_size;
        else{
            int t;
-            int32_t *pal = ((int32_t*)p->data[1]) + first_clr;
-            for(t = 0; t < colors; t++){
-                *pal++ = (0xff<<24) | bytestream_get_le24(&buf);
+            uint32_t *pal = ((uint32_t *)p->data[1]) + first_clr;
+
+            switch (pal_sample_size) {
+            case 3:
+                /* RGB24 */
+                for (t = 0; t < colors; t++)
+                    *pal++ = (0xffU<<24) | bytestream_get_le24(&buf);
+                break;
+            case 2:
+                /* RGB555 */
+                for (t = 0; t < colors; t++) {
+                    uint32_t v = bytestream_get_le16(&buf);
+                    v = ((v & 0x7C00) <<  9) |
+                        ((v & 0x03E0) <<  6) |
+                        ((v & 0x001F) <<  3);
+                    /* left bit replication */
+                    v |= (v & 0xE0E0E0U) >> 5;
+                    *pal++ = (0xffU<<24) | v;
+                }
+                break;
            }
            p->palette_has_changed = 1;
        }
--- a/libavfilter/vf_fade.c
+++ b/libavfilter/vf_fade.c
@ -149,7 +149,7 @@ static av_cold void uninit(AVFilterContext *ctx)

 static int query_formats(AVFilterContext *ctx)
 {
-    const static enum PixelFormat pix_fmts[] = {
+    static const enum PixelFormat pix_fmts[] = {
        PIX_FMT_YUV444P,  PIX_FMT_YUV422P,  PIX_FMT_YUV420P,
        PIX_FMT_YUV411P,  PIX_FMT_YUV410P,
        PIX_FMT_YUVJ444P, PIX_FMT_YUVJ422P, PIX_FMT_YUVJ420P,
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@ -72,7 +72,7 @@ typedef struct FormatEntry {
    int is_supported_in, is_supported_out;
 } FormatEntry;

-const static FormatEntry format_entries[PIX_FMT_NB] = {
+static const FormatEntry format_entries[PIX_FMT_NB] = {
    [PIX_FMT_YUV420P]     = { 1 , 1 },
    [PIX_FMT_YUYV422]     = { 1 , 1 },
    [PIX_FMT_RGB24]       = { 1 , 1 },
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@ -56,7 +56,7 @@ SECTION .text
 ; of 2. $offset is either 0 or 3. $dither holds 8 values.
 ;-----------------------------------------------------------------------------

-%macro yuv2planeX_fn 4
+%macro yuv2planeX_fn 3

 %ifdef ARCH_X86_32
 %define cntr_reg r1
@ -66,12 +66,12 @@ SECTION .text
 %define movsx movsxd
 %endif

-cglobal yuv2planeX_%2_%1, %4, 7, %3
-%if %2 == 8 || %2 == 9 || %2 == 10
+cglobal yuv2planeX_%1, %3, 7, %2
+%if %1 == 8 || %1 == 9 || %1 == 10
    pxor            m6,  m6
-%endif ; %2 == 8/9/10
+%endif ; %1 == 8/9/10

-%if %2 == 8
+%if %1 == 8
 %ifdef ARCH_X86_32
 %assign pad 0x2c - (stack_offset & 15)
    SUB             rsp, pad
@ -120,7 +120,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
    mova      [rsp+16],  m3
    mova      [rsp+24],  m_dith
 %endif ; mmsize == 8/16
-%endif ; %2 == 8
+%endif ; %1 == 8

    xor             r5,  r5

@ -130,11 +130,11 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
    ; 8 pixels but we can only handle 2 pixels per register, and thus 4
    ; pixels per iteration. In order to not have to keep track of where
    ; we are w.r.t. dithering, we unroll the mmx/8bit loop x2.
-%if %2 == 8
+%if %1 == 8
 %rep 16/mmsize
-%endif ; %2 == 8
+%endif ; %1 == 8

-%if %2 == 8
+%if %1 == 8
 %ifdef ARCH_X86_32
    mova            m2, [rsp+mmsize*(0+%%i)]
    mova            m1, [rsp+mmsize*(1+%%i)]
@ -142,31 +142,31 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
    mova            m2,  m8
    mova            m1,  m_dith
 %endif ; x86-32/64
-%else ; %2 == 9/10/16
-    mova            m1, [yuv2yuvX_%2_start]
+%else ; %1 == 9/10/16
+    mova            m1, [yuv2yuvX_%1_start]
    mova            m2,  m1
-%endif ; %2 == 8/9/10/16
+%endif ; %1 == 8/9/10/16
    movsx     cntr_reg,  r1m
 .filterloop_ %+ %%i:
    ; input pixels
    mov             r6, [r2+gprsize*cntr_reg-2*gprsize]
-%if %2 == 16
+%if %1 == 16
    mova            m3, [r6+r5*4]
    mova            m5, [r6+r5*4+mmsize]
-%else ; %2 == 8/9/10
+%else ; %1 == 8/9/10
    mova            m3, [r6+r5*2]
-%endif ; %2 == 8/9/10/16
+%endif ; %1 == 8/9/10/16
    mov             r6, [r2+gprsize*cntr_reg-gprsize]
-%if %2 == 16
+%if %1 == 16
    mova            m4, [r6+r5*4]
    mova            m6, [r6+r5*4+mmsize]
-%else ; %2 == 8/9/10
+%else ; %1 == 8/9/10
    mova            m4, [r6+r5*2]
-%endif ; %2 == 8/9/10/16
+%endif ; %1 == 8/9/10/16

    ; coefficients
    movd            m0, [r0+2*cntr_reg-4]; coeff[0], coeff[1]
-%if %2 == 16
+%if %1 == 16
    pshuflw         m7,  m0,  0          ; coeff[0]
    pshuflw         m0,  m0,  0x55       ; coeff[1]
    pmovsxwd        m7,  m7              ; word -> dword
@ -181,7 +181,7 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3
    paddd           m1,  m5
    paddd           m2,  m4
    paddd           m1,  m6
-%else ; %2 == 10/9/8
+%else ; %1 == 10/9/8
    punpcklwd       m5,  m3,  m4
    punpckhwd       m3,  m4
    SPLATD          m0,  m0
@ -191,85 +191,84 @@ cglobal yuv2planeX_%2_%1, %4, 7, %3

    paddd           m2,  m5
    paddd           m1,  m3
-%endif ; %2 == 8/9/10/16
+%endif ; %1 == 8/9/10/16

    sub       cntr_reg,  2
    jg .filterloop_ %+ %%i

-%if %2 == 16
-    psrad           m2,  31 - %2
-    psrad           m1,  31 - %2
-%else ; %2 == 10/9/8
-    psrad           m2,  27 - %2
-    psrad           m1,  27 - %2
-%endif ; %2 == 8/9/10/16
+%if %1 == 16
+    psrad           m2,  31 - %1
+    psrad           m1,  31 - %1
+%else ; %1 == 10/9/8
+    psrad           m2,  27 - %1
+    psrad           m1,  27 - %1
+%endif ; %1 == 8/9/10/16

-%if %2 == 8
+%if %1 == 8
    packssdw        m2,  m1
    packuswb        m2,  m2
    movh     [r3+r5*1],  m2
-%else ; %2 == 9/10/16
-%if %2 == 16
+%else ; %1 == 9/10/16
+%if %1 == 16
    packssdw        m2,  m1
    paddw           m2, [minshort]
-%else ; %2 == 9/10
-%ifidn %1, sse4
-    packusdw        m2,  m1
-%elifidn %1, avx
+%else ; %1 == 9/10
+%if cpuflag(sse4)
    packusdw        m2,  m1
 %else ; mmx2/sse2
    packssdw        m2,  m1
    pmaxsw          m2,  m6
 %endif ; mmx2/sse2/sse4/avx
-    pminsw          m2, [yuv2yuvX_%2_upper]
-%endif ; %2 == 9/10/16
+    pminsw          m2, [yuv2yuvX_%1_upper]
+%endif ; %1 == 9/10/16
    mova     [r3+r5*2],  m2
-%endif ; %2 == 8/9/10/16
+%endif ; %1 == 8/9/10/16

    add             r5,  mmsize/2
    sub             r4d, mmsize/2
-%if %2 == 8
+%if %1 == 8
 %assign %%i %%i+2
 %endrep
-%endif ; %2 == 8
+%endif ; %1 == 8
    jg .pixelloop

-%if %2 == 8
+%if %1 == 8
 %ifdef ARCH_X86_32
    ADD             rsp, pad
    RET
 %else ; x86-64
    REP_RET
 %endif ; x86-32/64
-%else ; %2 == 9/10/16
+%else ; %1 == 9/10/16
    REP_RET
-%endif ; %2 == 8/9/10/16
+%endif ; %1 == 8/9/10/16
 %endmacro

 %define PALIGNR PALIGNR_MMX
 %ifdef ARCH_X86_32
-INIT_MMX
-yuv2planeX_fn mmx2,  8,  0, 7
-yuv2planeX_fn mmx2,  9,  0, 5
-yuv2planeX_fn mmx2, 10,  0, 5
+INIT_MMX mmx2
+yuv2planeX_fn  8,  0, 7
+yuv2planeX_fn  9,  0, 5
+yuv2planeX_fn 10,  0, 5
 %endif

-INIT_XMM
-yuv2planeX_fn sse2,  8, 10, 7
-yuv2planeX_fn sse2,  9,  7, 5
-yuv2planeX_fn sse2, 10,  7, 5
+INIT_XMM sse2
+yuv2planeX_fn  8, 10, 7
+yuv2planeX_fn  9,  7, 5
+yuv2planeX_fn 10,  7, 5

 %define PALIGNR PALIGNR_SSSE3
-yuv2planeX_fn sse4,  8, 10, 7
-yuv2planeX_fn sse4,  9,  7, 5
-yuv2planeX_fn sse4, 10,  7, 5
-yuv2planeX_fn sse4, 16,  8, 5
+INIT_XMM sse4
+yuv2planeX_fn  8, 10, 7
+yuv2planeX_fn  9,  7, 5
+yuv2planeX_fn 10,  7, 5
+yuv2planeX_fn 16,  8, 5

 %ifdef HAVE_AVX
-INIT_AVX
-yuv2planeX_fn avx,   8, 10, 7
-yuv2planeX_fn avx,   9,  7, 5
-yuv2planeX_fn avx,  10,  7, 5
+INIT_XMM avx
+yuv2planeX_fn  8, 10, 7
+yuv2planeX_fn  9,  7, 5
+yuv2planeX_fn 10,  7, 5
 %endif

 ; %1=outout-bpc, %2=alignment (u/a)
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@ -34,10 +34,12 @@ FATE_TESTS += $(FATE_IMAGE)
 fate-image: $(FATE_IMAGE)

 FATE_TARGA = CBW8       \
+             CCM8       \
             CTC16      \
             CTC24      \
             CTC32      \
             UBW8       \
+             UCM8       \
             UTC16      \
             UTC24      \
             UTC32
@ -49,12 +51,12 @@ FATE_TESTS += $(FATE_TARGA)
 fate-targa: $(FATE_TARGA)

 fate-targa-conformance-CBW8:  CMD = framecrc -i $(SAMPLES)/targa-conformance/CBW8.TGA
-# fate-targa-conformance-CCM8:  CMD = framecrc -i $(SAMPLES)/targa-conformance/CCM8.TGA
+fate-targa-conformance-CCM8:  CMD = framecrc -i $(SAMPLES)/targa-conformance/CCM8.TGA  -pix_fmt rgba
 fate-targa-conformance-CTC16: CMD = framecrc -i $(SAMPLES)/targa-conformance/CTC16.TGA -pix_fmt rgb555le
 fate-targa-conformance-CTC24: CMD = framecrc -i $(SAMPLES)/targa-conformance/CTC24.TGA
 fate-targa-conformance-CTC32: CMD = framecrc -i $(SAMPLES)/targa-conformance/CTC32.TGA -pix_fmt bgra
 fate-targa-conformance-UBW8:  CMD = framecrc -i $(SAMPLES)/targa-conformance/UBW8.TGA
-# fate-targa-conformance-UCM8:  CMD = framecrc -i $(SAMPLES)/targa-conformance/UCM8.TGA
+fate-targa-conformance-UCM8:  CMD = framecrc -i $(SAMPLES)/targa-conformance/UCM8.TGA  -pix_fmt rgba
 fate-targa-conformance-UTC16: CMD = framecrc -i $(SAMPLES)/targa-conformance/UTC16.TGA -pix_fmt rgb555le
 fate-targa-conformance-UTC24: CMD = framecrc -i $(SAMPLES)/targa-conformance/UTC24.TGA
 fate-targa-conformance-UTC32: CMD = framecrc -i $(SAMPLES)/targa-conformance/UTC32.TGA -pix_fmt bgra
--- a/tests/ref/fate/targa-conformance-CCM8
+++ b/tests/ref/fate/targa-conformance-CCM8
@ -0,0 +1 @@
+0, 0, 65536, 0x47e97fe9
--- a/tests/ref/fate/targa-conformance-UCM8
+++ b/tests/ref/fate/targa-conformance-UCM8
@ -0,0 +1 @@
+0, 0, 65536, 0x47e97fe9