libswscale: Re-factor ff_shuffle_filter_coefficients.

Make the code more readable and follow the style guide. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
3 years ago · e534d98af3
parent f1a5414c97
commit e534d98af3
1 changed files with 36 additions and 30 deletions
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@ -278,42 +278,48 @@ static const FormatEntry format_entries[] = {
    [AV_PIX_FMT_P416LE]      = { 1, 1 },
 };
-int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){
+int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
                                   int filterSize, int16_t *filter,
                                   int dstW)
 {
 #if ARCH_X86_64
-    int i, j, k, l;
+    int i, j, k;
    int cpu_flags = av_get_cpu_flags();
    // avx2 hscale filter processes 16 pixel blocks.
    if (!filter || dstW % 16 != 0)
        return 0;
    if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
-        if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
+        if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
-            if (dstW % 16 == 0){
+           int16_t *filterCopy = NULL;
-                if (filter != NULL){
+           if (filterSize > 4) {
-                    for (i = 0; i < dstW; i += 8){
+               if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
-                        FFSWAP(int, filterPos[i + 2], filterPos[i+4]);
+                   return AVERROR(ENOMEM);
-                        FFSWAP(int, filterPos[i + 3], filterPos[i+5]);
+               memcpy(filterCopy, filter, dstW * filterSize * sizeof(int16_t));
-                    }
+           }
-                    if (filterSize > 4){
+           // Do not swap filterPos for pixels which won't be processed by
-                        int16_t *tmp2 = av_malloc(dstW * filterSize * 2);
+           // the main loop.
-                        if (!tmp2)
+           for (i = 0; i + 8 <= dstW; i += 8) {
-                            return AVERROR(ENOMEM);
+               FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
-                        memcpy(tmp2, filter, dstW * filterSize * 2);
+               FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
-                        for (i = 0; i < dstW; i += 16){//pixel
+           }
-                            for (k = 0; k < filterSize / 4; ++k){//fcoeff
+           if (filterSize > 4) {
-                                for (j = 0; j < 16; ++j){//inner pixel
+               // 16 pixels are processed at a time.
-                                    for (l = 0; l < 4; ++l){//coeff
+               for (i = 0; i + 16 <= dstW; i += 16) {
-                                        int from = i * filterSize + j * filterSize + k * 4 + l;
+                   // 4 filter coeffs are processed at a time.
-                                        int to = (i) * filterSize + j * 4 + l + k * 64;
+                   for (k = 0; k + 4 <= filterSize; k += 4) {
-                                        filter[to] = tmp2[from];
+                       for (j = 0; j < 16; ++j) {
-                                    }
+                           int from = (i + j) * filterSize + k;
-                                }
+                           int to = i * filterSize + j * 4 + k * 16;
-                            }
+                           memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t));
-                        }
+                       }
-                        av_free(tmp2);
+                   }
-                    }
+               }
-                }
+           }
-            }
+           av_free(filterCopy);
        }
    }
    return 0;
 #endif
    return 0;
 }
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)