swscale: use 15-bit intermediates for 9/10-bit scaling.

14 years ago · 28c1115a91
parent b2c087871d
commit 28c1115a91
14 changed files with 302 additions and 223 deletions
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@ -406,7 +406,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
        return;

-    if (c->scalingBpp == 8) {
+    if (c->srcBpc == 8 && c->dstBpc <= 10) {
        c->hScale       = hScale_altivec_real;
    }
    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@ -211,17 +211,9 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,

 #define output_pixel(pos, val) \
    if (big_endian) { \
-        if (output_bits == 16) { \
-            AV_WB16(pos, av_clip_uint16(val >> shift)); \
-        } else { \
-            AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
-        } \
+        AV_WB16(pos, av_clip_uint16(val >> shift)); \
    } else { \
-        if (output_bits == 16) { \
-            AV_WL16(pos, av_clip_uint16(val >> shift)); \
-        } else { \
-            AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
-        } \
+        AV_WL16(pos, av_clip_uint16(val >> shift)); \
    }
    for (i = 0; i < dstW; i++) {
        int val = 1 << (30-output_bits - 1);
@ -263,7 +255,67 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
 #undef output_pixel
 }

-#define yuv2NBPS(bits, BE_LE, is_be) \
+static av_always_inline void
+yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
+                      int lumFilterSize, const int16_t *chrFilter,
+                      const int16_t **chrUSrc, const int16_t **chrVSrc,
+                      int chrFilterSize, const int16_t **alpSrc,
+                      uint16_t *dest[4], int dstW, int chrDstW,
+                      int big_endian, int output_bits)
+{
+    //FIXME Optimize (just quickly written not optimized..)
+    int i;
+    uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+    int shift = 11 + 16 - output_bits - 1;
+
+#define output_pixel(pos, val) \
+    if (big_endian) { \
+        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+    } else { \
+        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+    }
+    for (i = 0; i < dstW; i++) {
+        int val = 1 << (26-output_bits - 1);
+        int j;
+
+        for (j = 0; j < lumFilterSize; j++)
+            val += (lumSrc[j][i] * lumFilter[j]) >> 1;
+
+        output_pixel(&yDest[i], val);
+    }
+
+    if (uDest) {
+        for (i = 0; i < chrDstW; i++) {
+            int u = 1 << (26-output_bits - 1);
+            int v = 1 << (26-output_bits - 1);
+            int j;
+
+            for (j = 0; j < chrFilterSize; j++) {
+                u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
+                v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
+            }
+
+            output_pixel(&uDest[i], u);
+            output_pixel(&vDest[i], v);
+        }
+    }
+
+    if (CONFIG_SWSCALE_ALPHA && aDest) {
+        for (i = 0; i < dstW; i++) {
+            int val = 1 << (26-output_bits - 1);
+            int j;
+
+            for (j = 0; j < lumFilterSize; j++)
+                val += (alpSrc[j][i] * lumFilter[j]) >> 1;
+
+            output_pixel(&aDest[i], val);
+        }
+    }
+#undef output_pixel
+}
+
+#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
                              const int16_t **_lumSrc, int lumFilterSize, \
                              const int16_t *chrFilter, const int16_t **_chrUSrc, \
@ -271,21 +323,21 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil
                              int chrFilterSize, const int16_t **_alpSrc, \
                              uint8_t *_dest[4], int dstW, int chrDstW) \
 { \
-    const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
-                  **chrUSrc = (const int32_t **) _chrUSrc, \
-                  **chrVSrc = (const int32_t **) _chrVSrc, \
-                  **alpSrc  = (const int32_t **) _alpSrc; \
-    yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
-                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                          alpSrc, (uint16_t **) _dest, \
-                          dstW, chrDstW, is_be, bits); \
-}
-yuv2NBPS( 9, BE, 1);
-yuv2NBPS( 9, LE, 0);
-yuv2NBPS(10, BE, 1);
-yuv2NBPS(10, LE, 0);
-yuv2NBPS(16, BE, 1);
-yuv2NBPS(16, LE, 0);
+    const typeX_t **lumSrc  = (const typeX_t **) _lumSrc, \
+                  **chrUSrc = (const typeX_t **) _chrUSrc, \
+                  **chrVSrc = (const typeX_t **) _chrVSrc, \
+                  **alpSrc  = (const typeX_t **) _alpSrc; \
+    yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
+                         chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                         alpSrc, (uint16_t **) _dest, \
+                         dstW, chrDstW, is_be, bits); \
+}
+yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
+yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
+yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);

 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                       const int16_t **lumSrc, int lumFilterSize,
@ -1857,15 +1909,15 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
    }
 }

-static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
-                       const int16_t *filter,
-                       const int16_t *filterPos, int filterSize)
+static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
+                           const int16_t *filter,
+                           const int16_t *filterPos, int filterSize)
 {
    int i;
    int32_t *dst = (int32_t *) _dst;
    const uint16_t *src = (const uint16_t *) _src;
    int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
-    int sh = (bits <= 7) ? 11 : (bits - 4);
+    int sh = bits - 4;

    for (i = 0; i < dstW; i++) {
        int j;
@ -1880,10 +1932,31 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_s
    }
 }

+static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
+                           const int16_t *filter,
+                           const int16_t *filterPos, int filterSize)
+{
+    int i;
+    const uint16_t *src = (const uint16_t *) _src;
+    int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int srcPos = filterPos[i];
+        int val = 0;
+
+        for (j = 0; j < filterSize; j++) {
+            val += src[srcPos + j] * filter[filterSize * i + j];
+        }
+        // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
+        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
+    }
+}
+
 // bilinear / bicubic scaling
-static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
-                     const int16_t *filter, const int16_t *filterPos,
-                     int filterSize)
+static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
+                          const int16_t *filter, const int16_t *filterPos,
+                          int filterSize)
 {
    int i;
    for (i=0; i<dstW; i++) {
@ -1899,6 +1972,25 @@ static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
    }
 }

+static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
+                          const int16_t *filter, const int16_t *filterPos,
+                          int filterSize)
+{
+    int i;
+    int32_t *dst = (int32_t *) _dst;
+    for (i=0; i<dstW; i++) {
+        int j;
+        int srcPos= filterPos[i];
+        int val=0;
+        for (j=0; j<filterSize; j++) {
+            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
+        }
+        //filter += hFilterSize;
+        dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
+        //dst[i] = val>>7;
+    }
+}
+
 //FIXME all pal and rgb srcFormats could do this convertion as well
 //FIXME all scalers more complex than bilinear could do half of this transform
 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
@ -1978,23 +2070,6 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
    }
 }

-static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len)
-{
-    int i;
-    uint8_t *dst = (uint8_t *) _dst;
-    for (i = len - 1; i >= 0; i--) {
-        dst[i * 2] = dst[i * 2 + 1] = src[i];
-    }
-}
-
-static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len)
-{
-    int i;
-    for (i = 0; i < len; i++) {
-        dst[i] = src[i] >> 4;
-    }
-}
-
 // *** horizontal scale Y line to temp buffer
 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
                                     const uint8_t *src, int srcW, int xInc,
@ -2011,11 +2086,6 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
        src= formatConvBuffer;
    }

-    if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
-        c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW);
-        src = formatConvBuffer;
-    }
-
    if (!c->hyscale_fast) {
        c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
    } else { // fast bilinear upscale / crap downscale
@ -2024,10 +2094,6 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,

    if (convertRange)
        convertRange(dst, dstWidth);
-
-    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
-        c->scale19To15Fw(dst, (int32_t *) dst, dstWidth);
-    }
 }

 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
@ -2052,20 +2118,12 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
                                     uint8_t *formatConvBuffer, uint32_t *pal)
 {
    if (c->chrToYV12) {
-        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
        src1= formatConvBuffer;
        src2= buf2;
    }

-    if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
-        uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16));
-        c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW);
-        c->scale8To16Rv((uint16_t *) buf2,             src2, srcW);
-        src1 = formatConvBuffer;
-        src2 = buf2;
-    }
-
    if (!c->hcscale_fast) {
        c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
        c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
@ -2075,11 +2133,6 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2

    if (c->chrConvertRange)
        c->chrConvertRange(dst1, dst2, dstWidth);
-
-    if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) {
-        c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth);
-        c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth);
-    }
 }

 static av_always_inline void
@ -2734,28 +2787,30 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
        }
    }

-    if (c->scalingBpp == 8) {
-    c->hScale       = hScale_c;
-    if (c->flags & SWS_FAST_BILINEAR) {
-        c->hyscale_fast = hyscale_fast_c;
-        c->hcscale_fast = hcscale_fast_c;
-    }
-
-    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
-        if (c->srcRange) {
-            c->lumConvertRange = lumRangeFromJpeg_c;
-            c->chrConvertRange = chrRangeFromJpeg_c;
+    if (c->srcBpc == 8) {
+        if (c->dstBpc <= 10) {
+            c->hScale       = hScale8To15_c;
+            if (c->flags & SWS_FAST_BILINEAR) {
+                c->hyscale_fast = hyscale_fast_c;
+                c->hcscale_fast = hcscale_fast_c;
+            }
        } else {
-            c->lumConvertRange = lumRangeToJpeg_c;
-            c->chrConvertRange = chrRangeToJpeg_c;
+            c->hScale = hScale8To19_c;
        }
-    }
    } else {
-        c->hScale = hScale16_c;
-        c->scale19To15Fw = scale19To15Fw_c;
-        c->scale8To16Rv  = scale8To16Rv_c;
+        c->hScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
+    }

-        if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+        if (c->dstBpc <= 10) {
+            if (c->srcRange) {
+                c->lumConvertRange = lumRangeFromJpeg_c;
+                c->chrConvertRange = chrRangeFromJpeg_c;
+            } else {
+                c->lumConvertRange = lumRangeToJpeg_c;
+                c->chrConvertRange = chrRangeToJpeg_c;
+            }
+        } else {
            if (c->srcRange) {
                c->lumConvertRange = lumRangeFromJpeg16_c;
                c->chrConvertRange = chrRangeFromJpeg16_c;
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@ -64,11 +64,16 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
 * without any additional vertical scaling (or point-scaling).
 *
 * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
- * @param dest    pointer to the 4 output planes (Y/U/V/A)
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param dest    pointer to the 4 output planes (Y/U/V/A). For >8bit
+ *                output, this is in uint16_t
 * @param dstW    width of dest[0], dest[3], lumSrc and alpSrc in pixels
 * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
 */
@ -82,14 +87,19 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c,
 *
 * @param c             SWS scaling context
 * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
 * @param lumFilterSize number of vertical luma/alpha input lines to scale
 * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
 * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
- * @param dest          pointer to the 4 output planes (Y/U/V/A)
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param dest          pointer to the 4 output planes (Y/U/V/A). For >8bit
+ *                      output, this is in uint16_t
 * @param dstW          width of dest[0], dest[3], lumSrc and alpSrc in pixels
 * @param chrDstW       width of dest[1], dest[2], chrUSrc and chrVSrc
 */
@ -105,11 +115,16 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
 * that this function may do chroma scaling, see the "uvalpha" argument.
 *
 * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
- * @param dest    pointer to the output plane
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param dest    pointer to the output plane. For 16bit output, this is
+ *                uint16_t
 * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
 *                to write into dest[]
 * @param uvalpha chroma scaling coefficient for the second line of chroma
@ -132,11 +147,16 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c,  const int16_t *lumSrc,
 * output by doing bilinear scaling between two input lines.
 *
 * @param c       SWS scaling context
- * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
- * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
- * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
- * @param dest    pointer to the output plane
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                19-bit for 16bit output (in int32_t)
+ * @param dest    pointer to the output plane. For 16bit output, this is
+ *                uint16_t
 * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
 *                to write into dest[]
 * @param yalpha  luma/alpha scaling coefficients for the second input line.
@ -160,14 +180,19 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c,  const int16_t *lumSrc[2],
 *
 * @param c             SWS scaling context
 * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
 * @param lumFilterSize number of vertical luma/alpha input lines to scale
 * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
- * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
- * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
 * @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
- * @param dest          pointer to the output plane
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8-10bit output,
+ *                      19-bit for 16bit output (in int32_t)
+ * @param dest          pointer to the output plane. For 16bit output, this is
+ *                      uint16_t
 * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
 *                      to write into dest[]
 * @param y             vertical line number for this output. This does not need
@ -207,7 +232,7 @@ typedef struct SwsContext {
    enum PixelFormat srcFormat;   ///< Source      pixel format.
    int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
    int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
-    int scalingBpp;
+    int dstBpc, srcBpc;
    int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
    int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
    int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
@ -431,17 +456,19 @@ typedef struct SwsContext {
     * lines, to produce one (differently sized) line of output data.
     *
     * @param dst        pointer to destination buffer for horizontally scaled
-     *                   data. If the scaling depth (SwsContext->scalingBpp) is
-     *                   8, data will be 15bpp in 16bits (int16_t) width. If
-     *                   scaling depth is 16, data will be 19bpp in 32bpp
-     *                   (int32_t) width.
+     *                   data. If the number of bits per component of one
+     *                   destination pixel (SwsContext->dstBpc) is <= 10, data
+     *                   will be 15bpc in 16bits (int16_t) width. Else (i.e.
+     *                   SwsContext->dstBpc == 16), data will be 19bpc in
+     *                   32bits (int32_t) width.
     * @param dstW       width of destination image
-     * @param src        pointer to source data to be scaled. If scaling depth
-     *                   is 8, this is 8bpp in 8bpp (uint8_t) width. If scaling
-     *                   depth is 16, this is native depth in 16bbp (uint16_t)
-     *                   width. In other words, for 9-bit YUV input, this is
-     *                   9bpp, for 10-bit YUV input, this is 10bpp, and for
-     *                   16-bit RGB or YUV, this is 16bpp.
+     * @param src        pointer to source data to be scaled. If the number of
+     *                   bits per component of a source pixel (SwsContext->srcBpc)
+     *                   is 8, this is 8bpc in 8bits (uint8_t) width. Else
+     *                   (i.e. SwsContext->dstBpc > 8), this is native depth
+     *                   in 16bits (uint16_t) width. In other words, for 9-bit
+     *                   YUV input, this is 9bpc, for 10-bit YUV input, this is
+     *                   10bpc, and for 16-bit RGB or YUV, this is 16bpc.
     * @param filter     filter coefficients to be used per output pixel for
     *                   scaling. This contains 14bpp filtering coefficients.
     *                   Guaranteed to contain dstW * filterSize entries.
@ -461,15 +488,6 @@ typedef struct SwsContext {
    void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
    void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.

-    /**
-     * dst[..] = (src[..] << 8) | src[..];
-     */
-    void (*scale8To16Rv)(uint16_t *dst, const uint8_t *src, int len);
-    /**
-     * dst[..] = src[..] >> 4;
-     */
-    void (*scale19To15Fw)(int16_t *dst, const int32_t *src, int len);
-
    int needs_hcscale; ///< Set if there are chroma planes to be converted.

 } SwsContext;
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@ -853,12 +853,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
        }
    }

-    c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
-                          av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8;
-    if (c->scalingBpp == 16)
+    c->srcBpc = 1 + av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1;
+    if (c->srcBpc < 8)
+        c->srcBpc = 8;
+    c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1;
+    if (c->dstBpc < 8)
+        c->dstBpc = 8;
+    if (c->dstBpc == 16)
        dst_stride <<= 1;
-    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2 * c->scalingBpp >> 3, fail);
-    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) {
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
+                     FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3,
+                     fail);
+    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->srcBpc == 8 && c->dstBpc <= 10) {
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
            if (flags&SWS_PRINT_INFO)
@ -1011,8 +1017,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
        FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+16, fail);
        c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
    }
-    // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
-    c->uv_off_px   = dst_stride_px + 64 / c->scalingBpp;
+    // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate)
+    c->uv_off_px   = dst_stride_px + 64 / (c->dstBpc &~ 7);
    c->uv_off_byte = dst_stride + 16;
    for (i=0; i<c->vChrBufSize; i++) {
        FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail);
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@ -2316,7 +2316,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
        }
    }

-    if (c->scalingBpp == 8) {
+    if (c->srcBpc == 8 && c->dstBpc <= 10) {
 #if !COMPILE_TEMPLATE_MMX2
    c->hScale       = RENAME(hScale      );
 #endif /* !COMPILE_TEMPLATE_MMX2 */
--- a/tests/ref/lavfi/pixdesc
+++ b/tests/ref/lavfi/pixdesc
@ -1,8 +1,8 @@
 abgr                037bf9df6a765520ad6d490066bf4b89
 argb                c442a8261c2265a07212ef0f72e35f5a
 bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
-bgr48be             74dedaaacae8fd1ef46e05f78cf29d62
-bgr48le             0eb7d30801eac6058814bddd330b3c76
+bgr48be             00624e6c7ec7ab19897ba2f0a3257fe8
+bgr48le             d02c235ebba7167881ca2d576497ff84
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
 bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
@ -18,8 +18,8 @@ monow               9251497f3b0634f1165d12d5a289d943
 nv12                e0af357888584d36eec5aa0f673793ef
 nv21                9a3297f3b34baa038b1f37cb202b512f
 rgb24               b41eba9651e1b5fe386289b506188105
-rgb48be             e3bc84c9af376fb6d0f0293cc7b713a6
-rgb48le             f51c0e71638a822458329abb2f4052c7
+rgb48be             cc139ec1dd9451f0e049c0cb3a0c8aa2
+rgb48le             86c5608904f75360d492dbc5c9589969
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
 rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
@ -38,14 +38,14 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
-yuv422p10le         3f478be644add24b6cc77e718a6e2afa
-yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
-yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
+yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
+yuv422p10le         d0607c260a45c973e6639f4e449730ad
+yuv422p16be         4e9b3b3467aeebb6a528cee5966800ed
+yuv422p16le         f87c81bf16916b64d201359be0b4b6f4
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
 yuv444p             0a98447b78fd476aa39686da6a74fa2e
-yuv444p16be         af555dbaa401b142a995566864f47545
-yuv444p16le         a803e8016997dad95c5b2a72f54c34d6
+yuv444p16be         3ad639fff73e56f3b09dd20c335478d6
+yuv444p16le         8a7e66dc91ab7971fd24a9105ff2699b
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
 yuvj420p            32eec78ba51857b16ce9b813a49b7189
 yuvj422p            0dfa0ed434f73be51428758c69e082cb
--- a/tests/ref/lavfi/pixfmts_copy
+++ b/tests/ref/lavfi/pixfmts_copy
@ -1,8 +1,8 @@
 abgr                037bf9df6a765520ad6d490066bf4b89
 argb                c442a8261c2265a07212ef0f72e35f5a
 bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
-bgr48be             74dedaaacae8fd1ef46e05f78cf29d62
-bgr48le             0eb7d30801eac6058814bddd330b3c76
+bgr48be             00624e6c7ec7ab19897ba2f0a3257fe8
+bgr48le             d02c235ebba7167881ca2d576497ff84
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
 bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
@ -18,8 +18,8 @@ monow               9251497f3b0634f1165d12d5a289d943
 nv12                e0af357888584d36eec5aa0f673793ef
 nv21                9a3297f3b34baa038b1f37cb202b512f
 rgb24               b41eba9651e1b5fe386289b506188105
-rgb48be             e3bc84c9af376fb6d0f0293cc7b713a6
-rgb48le             f51c0e71638a822458329abb2f4052c7
+rgb48be             cc139ec1dd9451f0e049c0cb3a0c8aa2
+rgb48le             86c5608904f75360d492dbc5c9589969
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
 rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
@ -38,14 +38,14 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
-yuv422p10le         3f478be644add24b6cc77e718a6e2afa
-yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
-yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
+yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
+yuv422p10le         d0607c260a45c973e6639f4e449730ad
+yuv422p16be         4e9b3b3467aeebb6a528cee5966800ed
+yuv422p16le         f87c81bf16916b64d201359be0b4b6f4
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
 yuv444p             0a98447b78fd476aa39686da6a74fa2e
-yuv444p16be         af555dbaa401b142a995566864f47545
-yuv444p16le         a803e8016997dad95c5b2a72f54c34d6
+yuv444p16be         3ad639fff73e56f3b09dd20c335478d6
+yuv444p16le         8a7e66dc91ab7971fd24a9105ff2699b
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
 yuvj420p            32eec78ba51857b16ce9b813a49b7189
 yuvj422p            0dfa0ed434f73be51428758c69e082cb
--- a/tests/ref/lavfi/pixfmts_crop
+++ b/tests/ref/lavfi/pixfmts_crop
@ -1,8 +1,8 @@
 abgr                cd761690872843d1b7ab0c695393c751
 argb                2ec6ef18769bcd651c2e8904d5a3ee67
 bgr24               3450fd00cf1493d1ded75544d82ba3ec
-bgr48be             a9a7d177cef0914d3f1d266f00dff676
-bgr48le             b475d1b529ed80c728ddbacd22d35281
+bgr48be             18ca4002732f278cc9f525215c2fca41
+bgr48le             395a4c187c4e95217d089bd3df9f3654
 bgr4_byte           2f6ac3cdd4676ab4e2982bdf0664945b
 bgr555be            d3a7c273604723adeb7e5f5dd1c4272b
 bgr555le            d22442fc13b464f9ba455b08df4e981f
@ -14,8 +14,8 @@ gray                8c4850e66562a587a292dc728a65ea4a
 gray16be            daa5a6b98fb4a280c57c57bff1a2ab5a
 gray16le            84f5ea7259073edcb893113b42213c8e
 rgb24               3b90ed64b687d3dc186c6ef521dc71a8
-rgb48be             b8f9fd6aaa24d75275ee2f8b8a7b9e55
-rgb48le             3e52e831a040f086c3ae983241172cce
+rgb48be             e6fd353c0eb9bea889423954414bea35
+rgb48le             68a1723da11ce08b502d42e204376503
 rgb4_byte           6958029f73c6cdfed4f71020d816f027
 rgb555be            41a7d1836837bc90f2cae19a9c9df3b3
 rgb555le            eeb78f8ce6186fba55c941469e60ba67
@ -29,12 +29,12 @@ yuv420p             bfea0188ddd4889787c403caae119cc7
 yuv420p16be         8365eff38b8c329aeb95fc605fa229bb
 yuv420p16le         5e8dd38d973d5854abe1ad4efad20cc1
 yuv422p             f2f930a91fe00d4252c4720b5ecd8961
-yuv422p16be         93f9b6f33f9529db6de6a9f0ddd70eb5
-yuv422p16le         2e66dcfec54ca6b57aa4bbd9ac234639
+yuv422p16be         167e4338811a7d272925a4c6417d60da
+yuv422p16le         3359395d5875d581fa1e975013d30114
 yuv440p             2472417d980e395ad6843cbb8b633b29
 yuv444p             1f151980486848c96bc5585ced99003e
-yuv444p16be         e7d1ecf0c11a41b5db192f761f55bd3c
-yuv444p16le         3298a0043d982e7cf1a33a1292fa11f0
+yuv444p16be         5d0c0ea66ab43c0c590d8c2a9256e43f
+yuv444p16le         3c0a747c1b64feb0ab8dfba92f92579a
 yuva420p            7536753dfbc7932560fb50c921369a0e
 yuvj420p            21f891093006d42d7683b0e1d773a657
 yuvj422p            9a43d474c407590ad8f213880586b45e
--- a/tests/ref/lavfi/pixfmts_hflip
+++ b/tests/ref/lavfi/pixfmts_hflip
@ -1,8 +1,8 @@
 abgr                49468c6c9ceee5d52b08b1270a909323
 argb                50ba9f16c6475530602f2983278b82d0
 bgr24               cc53d2011d097972db0d22756c3699e3
-bgr48be             90374bc92471f1bd4931d71ef8b73f50
-bgr48le             696f628d0dd32121e60a0d61ac47d6e6
+bgr48be             815192d3757c66de97b0d51818acbe0f
+bgr48le             8e4184ac6eae251b4bace51dba7d790c
 bgr4_byte           aac987e7d1a6a96477cfc0b48a4285de
 bgr555be            bc07265898440116772200390d70c092
 bgr555le            ccee08679bac84a1f960c6c9070c5538
@ -14,8 +14,8 @@ gray                03efcb4ab52a24c0af0e03cfd26c9377
 gray16be            9bcbca979601ddc4869f846f08f3d1dd
 gray16le            c1b8965adcc7f847ee343149ff507073
 rgb24               754f1722fc738590cc407ac65749bfe8
-rgb48be             2397b9d3c296ac15f8a2325a703f81c7
-rgb48le             527043c72546d8b4bb1ce2dea4b294c3
+rgb48be             d690412ca5fada031b5da47b87096248
+rgb48le             c901feb564232f5d0bc0eabd66dae3e7
 rgb4_byte           c8a3f995fcf3e0919239ea2c413ddc29
 rgb555be            045ce8607d3910586f4d97481dda8632
 rgb555le            8778ee0cf58ce9ad1d99a1eca9f95e87
@ -29,12 +29,12 @@ yuv420p             2d5c80f9ba2ddd85b2aeda3564cc7d64
 yuv420p16be         758b0c1e2113b15e7afde48da4e4d024
 yuv420p16le         480ccd951dcb806bc875d307e02e50a0
 yuv422p             6e728f4eb9eae287c224f396d84be6ea
-yuv422p16be         8657d2c8d443940300fdb4028d555631
-yuv422p16le         4ab27609981e50de5b1150125718ae76
+yuv422p16be         69cf0605496c321546899a8442ee64fb
+yuv422p16le         f0b443fea72f4b6f462859a73b159664
 yuv440p             a99e2b57ed601f39852715c9d675d0d3
 yuv444p             947e47f7bb5fdccc659d19b7df2b6fc3
-yuv444p16be         a5154ce329db0d2caf0bd43f1347dba3
-yuv444p16le         1f703308b90feb048191b3bccc695671
+yuv444p16be         bc7d53923cff1d7e98d24540845fb64b
+yuv444p16le         5df206a93f85ef8b77f5bdc81d9b0a0b
 yuva420p            d83ec0c01498189f179ec574918185f1
 yuvj420p            df3aaaec3bb157c3bde5f0365af30f4f
 yuvj422p            d113871528d510a192797af59df9c05c
--- a/tests/ref/lavfi/pixfmts_null
+++ b/tests/ref/lavfi/pixfmts_null
@ -1,8 +1,8 @@
 abgr                037bf9df6a765520ad6d490066bf4b89
 argb                c442a8261c2265a07212ef0f72e35f5a
 bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
-bgr48be             74dedaaacae8fd1ef46e05f78cf29d62
-bgr48le             0eb7d30801eac6058814bddd330b3c76
+bgr48be             00624e6c7ec7ab19897ba2f0a3257fe8
+bgr48le             d02c235ebba7167881ca2d576497ff84
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
 bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
@ -18,8 +18,8 @@ monow               9251497f3b0634f1165d12d5a289d943
 nv12                e0af357888584d36eec5aa0f673793ef
 nv21                9a3297f3b34baa038b1f37cb202b512f
 rgb24               b41eba9651e1b5fe386289b506188105
-rgb48be             e3bc84c9af376fb6d0f0293cc7b713a6
-rgb48le             f51c0e71638a822458329abb2f4052c7
+rgb48be             cc139ec1dd9451f0e049c0cb3a0c8aa2
+rgb48le             86c5608904f75360d492dbc5c9589969
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
 rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
@ -38,14 +38,14 @@ yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
 yuv420p9be          ce880fa07830e5297c22acf6e20555ce
 yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         107c6e31a3d4d598bca1d8426aaa54f5
-yuv422p10le         3f478be644add24b6cc77e718a6e2afa
-yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
-yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
+yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
+yuv422p10le         d0607c260a45c973e6639f4e449730ad
+yuv422p16be         4e9b3b3467aeebb6a528cee5966800ed
+yuv422p16le         f87c81bf16916b64d201359be0b4b6f4
 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
 yuv444p             0a98447b78fd476aa39686da6a74fa2e
-yuv444p16be         af555dbaa401b142a995566864f47545
-yuv444p16le         a803e8016997dad95c5b2a72f54c34d6
+yuv444p16be         3ad639fff73e56f3b09dd20c335478d6
+yuv444p16le         8a7e66dc91ab7971fd24a9105ff2699b
 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
 yuvj420p            32eec78ba51857b16ce9b813a49b7189
 yuvj422p            0dfa0ed434f73be51428758c69e082cb
--- a/tests/ref/lavfi/pixfmts_scale
+++ b/tests/ref/lavfi/pixfmts_scale
@ -1,8 +1,8 @@
 abgr                d894cb97f6c80eb21bdbe8a4eea62d86
 argb                54346f2b2eef10919e0f247241df3b24
 bgr24               570f8d6b51a838aed022ef67535f6bdc
-bgr48be             07f7a0cc34feb3646434d47c0cec8cee
-bgr48le             9abd2c3a66088e6c9078232064eba61e
+bgr48be             390d3058a12a99c2b153ed7922508bea
+bgr48le             39fe06feb4ec1d9730dccc04a0cfac4c
 bgr4_byte           ee1d35a7baf8e9016891929a2f565c0b
 bgr555be            de8901c1358834fddea060fcb3a67beb
 bgr555le            36b745067197f9ca8c1731cac51329c9
@ -18,8 +18,8 @@ monow               d31772ebaa877fc2a78565937f7f9673
 nv12                4676d59db43d657dc12841f6bc3ab452
 nv21                69c699510ff1fb777b118ebee1002f14
 rgb24               514692e28e8ff6860e415ce4fcf6eb8c
-rgb48be             f18841c19fc6d9c817a3095f557b9bc5
-rgb48le             819e7b8acd8965ba57ba46198a5cc9bf
+rgb48be             8fac63787a711886030f8e056872b488
+rgb48le             ab92f2763a2eb264c3870cc758f97149
 rgb4_byte           d81ffd3add95842a618eec81024f0b5c
 rgb555be            4607309f9f217d51cbb53d13b84b4537
 rgb555le            a350ef1dc2c9688ed49e7ba018843795
@ -31,21 +31,21 @@ uyvy422             314bd486277111a95d9369b944fa0400
 yuv410p             7df8f6d69b56a8dcb6c7ee908e5018b5
 yuv411p             1143e7c5cc28fe0922b051b17733bc4c
 yuv420p             fdad2d8df8985e3d17e73c71f713cb14
-yuv420p10be         af5429f27b9f95bf955e795921c65cdc
-yuv420p10le         d0b47e6a8a44e6b5ca0fe4349a4e393b
+yuv420p10be         d7695b9117d5b52819c569459e42669b
+yuv420p10le         0ac6d448db2df5f3d1346aa81f2b5f50
 yuv420p16be         9688e33e03b8c8275ab2fb1df0f06bee
 yuv420p16le         cba8b390ad5e7b8678e419b8ce79c008
-yuv420p9be          a073b2d93b2a7dce2069ba252bc43175
-yuv420p9le          b67233c3c7d93763d07d88f697c145e1
+yuv420p9be          8fa6e007b1a40f34eaa3e2beb73ea8af
+yuv420p9le          a7b131a7dd06906a5aef2e36d117b972
 yuv422p             918e37701ee7377d16a8a6c119c56a40
-yuv422p10be         533fd21e7943c20a1026b19069b3b867
-yuv422p10le         59b20a4a8609f5da2dc54c78aea11e6c
-yuv422p16be         2cf502d7d386db1f1b3b946679d897b1
-yuv422p16le         3002a4e47520731dcee5929aff49eb74
+yuv422p10be         35206fcd7e00ee582a8c366b37d57d1d
+yuv422p10le         396f930e2da02f149ab9dd5b781cbe8d
+yuv422p16be         285993ee0c0f4f8e511ee46f93c5f38c
+yuv422p16le         61bfcee8e54465f760164f5a75d40b5e
 yuv440p             461503fdb9b90451020aa3b25ddf041c
 yuv444p             81b2eba962d12e8d64f003ac56f6faf2
-yuv444p16be         b9f051ce7335923fe33efd162e48da1d
-yuv444p16le         fa47e317efac988b4a7fa55141c89126
+yuv444p16be         2677f3074d255f9dab625e9e2e092ca5
+yuv444p16le         65fa92521ef97088599ea83f9508cd5b
 yuva420p            8673a9131fb47de69788863f93a50eb7
 yuvj420p            30427bd6caf5bda93a173dbebe759e09
 yuvj422p            fc8288f64fd149573f73cf8da05d8e6d
--- a/tests/ref/lavfi/pixfmts_vflip
+++ b/tests/ref/lavfi/pixfmts_vflip
@ -1,8 +1,8 @@
 abgr                25e72e9dbd01ab00727c976d577f7be5
 argb                19869bf1a5ac0b6af4d8bbe2c104533c
 bgr24               89108a4ba00201f79b75b9305c42352d
-bgr48be             908b4edb525fd154a95a3744c4ab5420
-bgr48le             796c2072d6fa13a091f5c5b175417ed5
+bgr48be             2f23931844f57641f3737348182d118c
+bgr48le             4242a026012b6c135a6aa138a6d67031
 bgr4_byte           407fcf564ed764c38e1d748f700ab921
 bgr555be            f739d2519f7e9d494359bf67a3821537
 bgr555le            bd7b3ec4d684dfad075d89a606cb8b74
@ -18,8 +18,8 @@ monow               ff9869d067ecb94eb9d90c9750c31fea
 nv12                046f00f598ce14d9854a3534a5c99114
 nv21                01ea369dd2d0d3ed7451dc5c8d61497f
 rgb24               eaefabc168d0b14576bab45bc1e56e1e
-rgb48be             8e347deca2902e7dc1ece261322577d8
-rgb48le             2034e485f946e4064b5fb9be09865e55
+rgb48be             62dd185862ed142283bd300eb6dbd216
+rgb48le             dcb76353268bc5862194d131762220da
 rgb4_byte           8c6ff02df0b06dd2d574836c3741b2a2
 rgb555be            40dc33cfb5cf56aac1c5a290ac486c36
 rgb555le            4f8eaad29a17e0f8e9d8ab743e76b999
@ -38,14 +38,14 @@ yuv420p16le         0f609e588e5a258644ef85170d70e030
 yuv420p9be          be40ec975fb2873891643cbbbddbc3b0
 yuv420p9le          7e606310d3f5ff12badf911e8f333471
 yuv422p             d7f5cb44d9b0210d66d6a8762640ab34
-yuv422p10be         a28b051168af49435c04af5f58dce47b
-yuv422p10le         35936ffff30df2697f47b9b8d2cb7dea
-yuv422p16be         51d9aa4e78d121c226d919ce97976fe4
-yuv422p16le         12965c54bda8932ca72da194419a9908
+yuv422p10be         588fe319b96513c32e21d3e32b45447f
+yuv422p10le         11b57f2bd9661024153f3973b9090cdb
+yuv422p16be         c092d083548c2a144c372a98c46875c7
+yuv422p16le         c071b9397a416d51cbe339345cbcba84
 yuv440p             876385e96165acf51271b20e5d85a416
 yuv444p             9c3c667d1613b72d15bc6d851c5eb8f7
-yuv444p16be         6502abd75030d462c58d99a8673ec517
-yuv444p16le         cd7e88b6d08425450a57555bc86ab210
+yuv444p16be         6a954614fd2a8ae0df53e4fd76937af8
+yuv444p16le         65613965fb58cc4c3cd480a68b6540ea
 yuva420p            c705d1cf061d8c6580ac690b55f92276
 yuvj420p            41fd02b204da0ab62452cd14b595e2e4
 yuvj422p            7f6ca9bc1812cde02036d7d29a7cce43
--- a/tests/ref/vsynth1/dnxhd_720p_10bit
+++ b/tests/ref/vsynth1/dnxhd_720p_10bit
@ -1,4 +1,4 @@
-3ed972af47641d39a19916b0cd119120 *./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
-2293760 ./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
-b64efb0b4eb934bb66f4530c12d5d7fa *./tests/data/dnxhd_720p_10bit.vsynth1.out.yuv
-stddev:    6.27 PSNR: 32.18 MAXDIFF:   65 bytes:   760320/  7603200
+cb29b6ae4e1562d95f9311991fef98df *./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
+ 2293760 ./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
+2f45bb1af7da5dd3dca870ac87237b7d *./tests/data/dnxhd_720p_10bit.vsynth1.out.yuv
+stddev:    6.27 PSNR: 32.18 MAXDIFF:   64 bytes:   760320/  7603200
--- a/tests/ref/vsynth2/dnxhd_720p_10bit
+++ b/tests/ref/vsynth2/dnxhd_720p_10bit
@ -1,4 +1,4 @@
-0b8389955cce583bd2db7d2e727a6f15 *./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
-2293760 ./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
-bde04e992df2473e89aef4460265332d *./tests/data/dnxhd_720p_10bit.vsynth2.out.yuv
-stddev:    1.45 PSNR: 44.89 MAXDIFF:   22 bytes:   760320/  7603200
+8648511257afb816b5b911706ca391db *./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
+ 2293760 ./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
+391b6f5aa7c7b488b479cb43d420b860 *./tests/data/dnxhd_720p_10bit.vsynth2.out.yuv
+stddev:    1.35 PSNR: 45.46 MAXDIFF:   23 bytes:   760320/  7603200