From dcb4cabb26401ad0da2b9f5b0c5296fdc2a63dbf Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 1 Jun 2021 19:48:49 +0000 Subject: [PATCH] 3rdparty: libjpeg-turbo 2.0.6 => 2.1.0 https://github.com/libjpeg-turbo/libjpeg-turbo/releases/tag/2.1.0 --- 3rdparty/libjpeg-turbo/CMakeLists.txt | 7 +- 3rdparty/libjpeg-turbo/LICENSE.md | 2 +- 3rdparty/libjpeg-turbo/README.ijg | 15 +- 3rdparty/libjpeg-turbo/README.md | 2 +- 3rdparty/libjpeg-turbo/jconfig.h.in | 5 - 3rdparty/libjpeg-turbo/jconfig.h.win.in | 1 - 3rdparty/libjpeg-turbo/src/jccolext.c | 18 +- 3rdparty/libjpeg-turbo/src/jccolor.c | 12 +- 3rdparty/libjpeg-turbo/src/jcdctmgr.c | 37 ++- 3rdparty/libjpeg-turbo/src/jchuff.c | 386 +++++++++++++----------- 3rdparty/libjpeg-turbo/src/jcphuff.c | 25 +- 3rdparty/libjpeg-turbo/src/jcsample.c | 63 ++-- 3rdparty/libjpeg-turbo/src/jdapistd.c | 10 +- 3rdparty/libjpeg-turbo/src/jdarith.c | 15 +- 3rdparty/libjpeg-turbo/src/jdcoefct.c | 290 ++++++++++++++---- 3rdparty/libjpeg-turbo/src/jdcoefct.h | 3 +- 3rdparty/libjpeg-turbo/src/jdcol565.c | 96 +++--- 3rdparty/libjpeg-turbo/src/jdcolext.c | 8 +- 3rdparty/libjpeg-turbo/src/jdcolor.c | 14 +- 3rdparty/libjpeg-turbo/src/jdhuff.c | 60 ++-- 3rdparty/libjpeg-turbo/src/jdhuff.h | 13 +- 3rdparty/libjpeg-turbo/src/jdicc.c | 32 +- 3rdparty/libjpeg-turbo/src/jdmarker.c | 67 ++-- 3rdparty/libjpeg-turbo/src/jdmaster.c | 15 +- 3rdparty/libjpeg-turbo/src/jdmrg565.c | 68 ++--- 3rdparty/libjpeg-turbo/src/jdmrgext.c | 34 +-- 3rdparty/libjpeg-turbo/src/jdphuff.c | 48 ++- 3rdparty/libjpeg-turbo/src/jdsample.c | 38 ++- 3rdparty/libjpeg-turbo/src/jerror.h | 13 + 3rdparty/libjpeg-turbo/src/jidctint.c | 8 +- 3rdparty/libjpeg-turbo/src/jmorecfg.h | 35 --- 3rdparty/libjpeg-turbo/src/jpegint.h | 5 +- 3rdparty/libjpeg-turbo/src/jquant1.c | 27 +- 3rdparty/libjpeg-turbo/src/jquant2.c | 46 +-- 3rdparty/libjpeg-turbo/src/jsimd.h | 6 + 3rdparty/libjpeg-turbo/src/jsimd_none.c | 13 + 3rdparty/libjpeg-turbo/src/jversion.h | 12 +- 37 files changed, 871 insertions(+), 678 deletions(-) diff --git a/3rdparty/libjpeg-turbo/CMakeLists.txt b/3rdparty/libjpeg-turbo/CMakeLists.txt index 901669a4a8..3c7f29b08e 100644 --- a/3rdparty/libjpeg-turbo/CMakeLists.txt +++ b/3rdparty/libjpeg-turbo/CMakeLists.txt @@ -3,10 +3,10 @@ project(${JPEG_LIBRARY} C) ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-64-to-32 -Wimplicit-fallthrough) set(VERSION_MAJOR 2) -set(VERSION_MINOR 0) -set(VERSION_REVISION 6) +set(VERSION_MINOR 1) +set(VERSION_REVISION 0) set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION}) -set(LIBJPEG_TURBO_VERSION_NUMBER 2000006) +set(LIBJPEG_TURBO_VERSION_NUMBER 2001000) string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo") if(CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -46,7 +46,6 @@ if(UNIX) ocv_update(HAVE_UNSIGNED_SHORT 1) # undef INCOMPLETE_TYPES_BROKEN ocv_update(RIGHT_SHIFT_IS_UNSIGNED 0) - ocv_update(__CHAR_UNSIGNED__ 0) endif() diff --git a/3rdparty/libjpeg-turbo/LICENSE.md b/3rdparty/libjpeg-turbo/LICENSE.md index 99c9aadcc4..a1cdad52fa 100644 --- a/3rdparty/libjpeg-turbo/LICENSE.md +++ b/3rdparty/libjpeg-turbo/LICENSE.md @@ -91,7 +91,7 @@ best of our understanding. The Modified (3-clause) BSD License =================================== -Copyright (C)2009-2020 D. R. Commander. All Rights Reserved. +Copyright (C)2009-2021 D. R. Commander. All Rights Reserved.
Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. Redistribution and use in source and binary forms, with or without diff --git a/3rdparty/libjpeg-turbo/README.ijg b/3rdparty/libjpeg-turbo/README.ijg index d681cf1273..9453c19501 100644 --- a/3rdparty/libjpeg-turbo/README.ijg +++ b/3rdparty/libjpeg-turbo/README.ijg @@ -128,7 +128,7 @@ with respect to this software, its quality, accuracy, merchantability, or fitness for a particular purpose. This software is provided "AS IS", and you, its user, assume the entire risk as to its quality and accuracy. -This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding. +This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding. All Rights Reserved except as specified below. Permission is hereby granted to use, copy, modify, and distribute this @@ -159,19 +159,6 @@ commercial products, provided that all warranty or liability claims are assumed by the product vendor. -The IJG distribution formerly included code to read and write GIF files. -To avoid entanglement with the Unisys LZW patent (now expired), GIF reading -support has been removed altogether, and the GIF writer has been simplified -to produce "uncompressed GIFs". This technique does not use the LZW -algorithm; the resulting GIF files are larger than usual, but are readable -by all standard GIF decoders. - -We are required to state that - "The Graphics Interchange Format(c) is the Copyright property of - CompuServe Incorporated. GIF(sm) is a Service Mark property of - CompuServe Incorporated." - - REFERENCES ========== diff --git a/3rdparty/libjpeg-turbo/README.md b/3rdparty/libjpeg-turbo/README.md index 90a4a43ee1..01e391ea7c 100644 --- a/3rdparty/libjpeg-turbo/README.md +++ b/3rdparty/libjpeg-turbo/README.md @@ -3,7 +3,7 @@ Background libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and -MIPS systems, as well as progressive JPEG compression on x86 and x86-64 +MIPS systems, as well as progressive JPEG compression on x86, x86-64, and Arm systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized diff --git a/3rdparty/libjpeg-turbo/jconfig.h.in b/3rdparty/libjpeg-turbo/jconfig.h.in index 18a69a4814..d4284d97b8 100644 --- a/3rdparty/libjpeg-turbo/jconfig.h.in +++ b/3rdparty/libjpeg-turbo/jconfig.h.in @@ -61,11 +61,6 @@ unsigned. */ #cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1 -/* Define to 1 if type `char' is unsigned and you are not using gcc. */ -#ifndef __CHAR_UNSIGNED__ - #cmakedefine __CHAR_UNSIGNED__ 1 -#endif - /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/3rdparty/libjpeg-turbo/jconfig.h.win.in b/3rdparty/libjpeg-turbo/jconfig.h.win.in index 6db0b345b2..13cceef01d 100644 --- a/3rdparty/libjpeg-turbo/jconfig.h.win.in +++ b/3rdparty/libjpeg-turbo/jconfig.h.win.in @@ -18,7 +18,6 @@ #define HAVE_UNSIGNED_SHORT #undef INCOMPLETE_TYPES_BROKEN #undef RIGHT_SHIFT_IS_UNSIGNED -#undef __CHAR_UNSIGNED__ /* Define "boolean" as unsigned char, not int, per Windows custom */ #ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */ diff --git a/3rdparty/libjpeg-turbo/src/jccolext.c b/3rdparty/libjpeg-turbo/src/jccolext.c index 19c955c9d6..303b322ce6 100644 --- a/3rdparty/libjpeg-turbo/src/jccolext.c +++ b/3rdparty/libjpeg-turbo/src/jccolext.c @@ -48,9 +48,9 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr2 = output_buf[2][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); + r = inptr[RGB_RED]; + g = inptr[RGB_GREEN]; + b = inptr[RGB_BLUE]; inptr += RGB_PIXELSIZE; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. @@ -100,9 +100,9 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); + r = inptr[RGB_RED]; + g = inptr[RGB_GREEN]; + b = inptr[RGB_BLUE]; inptr += RGB_PIXELSIZE; /* Y */ outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + @@ -135,9 +135,9 @@ rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr2 = output_buf[2][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - outptr0[col] = GETJSAMPLE(inptr[RGB_RED]); - outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]); - outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]); + outptr0[col] = inptr[RGB_RED]; + outptr1[col] = inptr[RGB_GREEN]; + outptr2[col] = inptr[RGB_BLUE]; inptr += RGB_PIXELSIZE; } } diff --git a/3rdparty/libjpeg-turbo/src/jccolor.c b/3rdparty/libjpeg-turbo/src/jccolor.c index 036f6016d1..bdc563c723 100644 --- a/3rdparty/libjpeg-turbo/src/jccolor.c +++ b/3rdparty/libjpeg-turbo/src/jccolor.c @@ -392,11 +392,11 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr3 = output_buf[3][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - r = MAXJSAMPLE - GETJSAMPLE(inptr[0]); - g = MAXJSAMPLE - GETJSAMPLE(inptr[1]); - b = MAXJSAMPLE - GETJSAMPLE(inptr[2]); + r = MAXJSAMPLE - inptr[0]; + g = MAXJSAMPLE - inptr[1]; + b = MAXJSAMPLE - inptr[2]; /* K passes through as-is */ - outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ + outptr3[col] = inptr[3]; inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. @@ -438,7 +438,7 @@ grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */ + outptr[col] = inptr[0]; inptr += instride; } } @@ -497,7 +497,7 @@ null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, inptr = *input_buf; outptr = output_buf[ci][output_row]; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ + outptr[col] = inptr[ci]; inptr += nc; } } diff --git a/3rdparty/libjpeg-turbo/src/jcdctmgr.c b/3rdparty/libjpeg-turbo/src/jcdctmgr.c index c04058e6ce..7dae17a6e1 100644 --- a/3rdparty/libjpeg-turbo/src/jcdctmgr.c +++ b/3rdparty/libjpeg-turbo/src/jcdctmgr.c @@ -381,19 +381,19 @@ convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) elemptr = sample_data[elemr] + start_col; #if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; #else { register int elemc; for (elemc = DCTSIZE; elemc > 0; elemc--) - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; } #endif } @@ -533,20 +533,19 @@ convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col; #if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); #else { register int elemc; for (elemc = DCTSIZE; elemc > 0; elemc--) - *workspaceptr++ = (FAST_FLOAT) - (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); } #endif } diff --git a/3rdparty/libjpeg-turbo/src/jchuff.c b/3rdparty/libjpeg-turbo/src/jchuff.c index db85ce114f..2bce767ebd 100644 --- a/3rdparty/libjpeg-turbo/src/jchuff.c +++ b/3rdparty/libjpeg-turbo/src/jchuff.c @@ -4,8 +4,10 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander. + * Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander. * Copyright (C) 2015, Matthieu Darbois. + * Copyright (C) 2018, Matthias Räncker. + * Copyright (C) 2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -42,15 +44,19 @@ * flags (this defines __thumb__). */ -/* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ + defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" @@ -65,31 +71,42 @@ * but must not be updated permanently until we complete the MCU. */ -typedef struct { - size_t put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ -} savable_state; +#if defined(__x86_64__) && defined(__ILP32__) +typedef unsigned long long bit_buf_type; +#else +typedef size_t bit_buf_type; +#endif -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. +/* NOTE: The more optimal Huffman encoding algorithm is only used by the + * intrinsics implementation of the Arm Neon SIMD extensions, which is why we + * retain the old Huffman encoder behavior when using the GAS implementation. */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest, src) ((dest) = (src)) +#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \ + defined(_M_ARM) || defined(_M_ARM64)) +typedef unsigned long long simd_bit_buf_type; #else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest, src) \ - ((dest).put_buffer = (src).put_buffer, \ - (dest).put_bits = (src).put_bits, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) +typedef bit_buf_type simd_bit_buf_type; #endif + +#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \ + (defined(__x86_64__) && defined(__ILP32__)) +#define BIT_BUF_SIZE 64 +#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32) +#define BIT_BUF_SIZE 32 +#else +#error Cannot determine word size #endif +#define SIMD_BIT_BUF_SIZE (sizeof(simd_bit_buf_type) * 8) +typedef struct { + union { + bit_buf_type c; + simd_bit_buf_type simd; + } put_buffer; /* current bit accumulation buffer */ + int free_bits; /* # of bits available in it */ + /* (Neon GAS: # of bits now in it) */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ +} savable_state; typedef struct { struct jpeg_entropy_encoder pub; /* public fields */ @@ -123,6 +140,7 @@ typedef struct { size_t free_in_buffer; /* # of byte spaces remaining in buffer */ savable_state cur; /* Current bit buffer & DC state */ j_compress_ptr cinfo; /* dump_buffer needs access to this */ + int simd; } working_state; @@ -201,8 +219,17 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics) } /* Initialize bit buffer to empty */ - entropy->saved.put_buffer = 0; - entropy->saved.put_bits = 0; + if (entropy->simd) { + entropy->saved.put_buffer.simd = 0; +#if defined(__aarch64__) && !defined(NEON_INTRINSICS) + entropy->saved.free_bits = 0; +#else + entropy->saved.free_bits = SIMD_BIT_BUF_SIZE; +#endif + } else { + entropy->saved.put_buffer.c = 0; + entropy->saved.free_bits = BIT_BUF_SIZE; + } /* Initialize restart stuff */ entropy->restarts_to_go = cinfo->restart_interval; @@ -287,6 +314,7 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno, * this lets us detect duplicate VAL entries here, and later * allows emit_bits to detect any attempt to emit such symbols. */ + MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco)); MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi)); /* This is also a convenient place to check for out-of-range @@ -334,94 +362,94 @@ dump_buffer(working_state *state) /* Outputting bits to the file */ -/* These macros perform the same task as the emit_bits() function in the - * original libjpeg code. In addition to reducing overhead by explicitly - * inlining the code, additional performance is achieved by taking into - * account the size of the bit buffer and waiting until it is almost full - * before emptying it. This mostly benefits 64-bit platforms, since 6 - * bytes can be stored in a 64-bit bit buffer before it has to be emptied. +/* Output byte b and, speculatively, an additional 0 byte. 0xFF must be + * encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the + * byte is 0xFF. Otherwise, the output buffer pointer is advanced by 1, and + * the speculative 0 byte will be overwritten by the next byte. */ - -#define EMIT_BYTE() { \ - JOCTET c; \ - put_bits -= 8; \ - c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \ - *buffer++ = c; \ - if (c == 0xFF) /* need to stuff a zero byte? */ \ - *buffer++ = 0; \ +#define EMIT_BYTE(b) { \ + buffer[0] = (JOCTET)(b); \ + buffer[1] = 0; \ + buffer -= -2 + ((JOCTET)(b) < 0xFF); \ } -#define PUT_BITS(code, size) { \ - put_bits += size; \ - put_buffer = (put_buffer << size) | code; \ -} - -#if SIZEOF_SIZE_T != 8 && !defined(_WIN64) - -#define CHECKBUF15() { \ - if (put_bits > 15) { \ - EMIT_BYTE() \ - EMIT_BYTE() \ +/* Output the entire bit buffer. If there are no 0xFF bytes in it, then write + * directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to + * encode 0xFF as 0xFF 0x00. + */ +#if BIT_BUF_SIZE == 64 + +#define FLUSH() { \ + if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \ + EMIT_BYTE(put_buffer >> 56) \ + EMIT_BYTE(put_buffer >> 48) \ + EMIT_BYTE(put_buffer >> 40) \ + EMIT_BYTE(put_buffer >> 32) \ + EMIT_BYTE(put_buffer >> 24) \ + EMIT_BYTE(put_buffer >> 16) \ + EMIT_BYTE(put_buffer >> 8) \ + EMIT_BYTE(put_buffer ) \ + } else { \ + buffer[0] = (JOCTET)(put_buffer >> 56); \ + buffer[1] = (JOCTET)(put_buffer >> 48); \ + buffer[2] = (JOCTET)(put_buffer >> 40); \ + buffer[3] = (JOCTET)(put_buffer >> 32); \ + buffer[4] = (JOCTET)(put_buffer >> 24); \ + buffer[5] = (JOCTET)(put_buffer >> 16); \ + buffer[6] = (JOCTET)(put_buffer >> 8); \ + buffer[7] = (JOCTET)(put_buffer); \ + buffer += 8; \ } \ } -#endif - -#define CHECKBUF31() { \ - if (put_bits > 31) { \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - } \ -} +#else -#define CHECKBUF47() { \ - if (put_bits > 47) { \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ +#define FLUSH() { \ + if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \ + EMIT_BYTE(put_buffer >> 24) \ + EMIT_BYTE(put_buffer >> 16) \ + EMIT_BYTE(put_buffer >> 8) \ + EMIT_BYTE(put_buffer ) \ + } else { \ + buffer[0] = (JOCTET)(put_buffer >> 24); \ + buffer[1] = (JOCTET)(put_buffer >> 16); \ + buffer[2] = (JOCTET)(put_buffer >> 8); \ + buffer[3] = (JOCTET)(put_buffer); \ + buffer += 4; \ } \ } -#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T) -#error Cannot determine word size #endif -#if SIZEOF_SIZE_T == 8 || defined(_WIN64) - -#define EMIT_BITS(code, size) { \ - CHECKBUF47() \ - PUT_BITS(code, size) \ -} - -#define EMIT_CODE(code, size) { \ - temp2 &= (((JLONG)1) << nbits) - 1; \ - CHECKBUF31() \ - PUT_BITS(code, size) \ - PUT_BITS(temp2, nbits) \ +/* Fill the bit buffer to capacity with the leading bits from code, then output + * the bit buffer and put the remaining bits from code into the bit buffer. + */ +#define PUT_AND_FLUSH(code, size) { \ + put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \ + FLUSH() \ + free_bits += BIT_BUF_SIZE; \ + put_buffer = code; \ } -#else - -#define EMIT_BITS(code, size) { \ - PUT_BITS(code, size) \ - CHECKBUF15() \ +/* Insert code into the bit buffer and output the bit buffer if needed. + * NOTE: We can't flush with free_bits == 0, since the left shift in + * PUT_AND_FLUSH() would have undefined behavior. + */ +#define PUT_BITS(code, size) { \ + free_bits -= size; \ + if (free_bits < 0) \ + PUT_AND_FLUSH(code, size) \ + else \ + put_buffer = (put_buffer << size) | code; \ } -#define EMIT_CODE(code, size) { \ - temp2 &= (((JLONG)1) << nbits) - 1; \ - PUT_BITS(code, size) \ - CHECKBUF15() \ - PUT_BITS(temp2, nbits) \ - CHECKBUF15() \ +#define PUT_CODE(code, size) { \ + temp &= (((JLONG)1) << nbits) - 1; \ + temp |= code << nbits; \ + nbits += size; \ + PUT_BITS(temp, nbits) \ } -#endif - /* Although it is exceedingly rare, it is possible for a Huffman-encoded * coefficient block to be larger than the 128-byte unencoded block. For each @@ -444,6 +472,7 @@ dump_buffer(working_state *state) #define STORE_BUFFER() { \ if (localbuf) { \ + size_t bytes, bytestocopy; \ bytes = buffer - _buffer; \ buffer = _buffer; \ while (bytes > 0) { \ @@ -466,20 +495,46 @@ dump_buffer(working_state *state) LOCAL(boolean) flush_bits(working_state *state) { - JOCTET _buffer[BUFSIZE], *buffer; - size_t put_buffer; int put_bits; - size_t bytes, bytestocopy; int localbuf = 0; + JOCTET _buffer[BUFSIZE], *buffer, temp; + simd_bit_buf_type put_buffer; int put_bits; + int localbuf = 0; + + if (state->simd) { +#if defined(__aarch64__) && !defined(NEON_INTRINSICS) + put_bits = state->cur.free_bits; +#else + put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits; +#endif + put_buffer = state->cur.put_buffer.simd; + } else { + put_bits = BIT_BUF_SIZE - state->cur.free_bits; + put_buffer = state->cur.put_buffer.c; + } - put_buffer = state->cur.put_buffer; - put_bits = state->cur.put_bits; LOAD_BUFFER() - /* fill any partial byte with ones */ - PUT_BITS(0x7F, 7) - while (put_bits >= 8) EMIT_BYTE() + while (put_bits >= 8) { + put_bits -= 8; + temp = (JOCTET)(put_buffer >> put_bits); + EMIT_BYTE(temp) + } + if (put_bits) { + /* fill partial byte with ones */ + temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits)); + EMIT_BYTE(temp) + } - state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ - state->cur.put_bits = 0; + if (state->simd) { /* and reset bit buffer to empty */ + state->cur.put_buffer.simd = 0; +#if defined(__aarch64__) && !defined(NEON_INTRINSICS) + state->cur.free_bits = 0; +#else + state->cur.free_bits = SIMD_BIT_BUF_SIZE; +#endif + } else { + state->cur.put_buffer.c = 0; + state->cur.free_bits = BIT_BUF_SIZE; + } STORE_BUFFER() return TRUE; @@ -493,7 +548,7 @@ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val, c_derived_tbl *dctbl, c_derived_tbl *actbl) { JOCTET _buffer[BUFSIZE], *buffer; - size_t bytes, bytestocopy; int localbuf = 0; + int localbuf = 0; LOAD_BUFFER() @@ -509,53 +564,41 @@ LOCAL(boolean) encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val, c_derived_tbl *dctbl, c_derived_tbl *actbl) { - int temp, temp2, temp3; - int nbits; - int r, code, size; + int temp, nbits, free_bits; + bit_buf_type put_buffer; JOCTET _buffer[BUFSIZE], *buffer; - size_t put_buffer; int put_bits; - int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0]; - size_t bytes, bytestocopy; int localbuf = 0; + int localbuf = 0; - put_buffer = state->cur.put_buffer; - put_bits = state->cur.put_bits; + free_bits = state->cur.free_bits; + put_buffer = state->cur.put_buffer.c; LOAD_BUFFER() /* Encode the DC coefficient difference per section F.1.2.1 */ - temp = temp2 = block[0] - last_dc_val; + temp = block[0] - last_dc_val; /* This is a well-known technique for obtaining the absolute value without a * branch. It is derived from an assembly language technique presented in * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by - * Agner Fog. + * Agner Fog. This code assumes we are on a two's complement machine. */ - temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); - temp ^= temp3; - temp -= temp3; - - /* For a negative input, want temp2 = bitwise complement of abs(input) */ - /* This code assumes we are on a two's complement machine */ - temp2 += temp3; + nbits = temp >> (CHAR_BIT * sizeof(int) - 1); + temp += nbits; + nbits ^= temp; /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = JPEG_NBITS(temp); - - /* Emit the Huffman-coded symbol for the number of bits */ - code = dctbl->ehufco[nbits]; - size = dctbl->ehufsi[nbits]; - EMIT_BITS(code, size) + nbits = JPEG_NBITS(nbits); - /* Mask off any extra bits in code */ - temp2 &= (((JLONG)1) << nbits) - 1; - - /* Emit that number of bits of the value, if positive, */ - /* or the complement of its magnitude, if negative. */ - EMIT_BITS(temp2, nbits) + /* Emit the Huffman-coded symbol for the number of bits. + * Emit that number of bits of the value, if positive, + * or the complement of its magnitude, if negative. + */ + PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits]) /* Encode the AC coefficients per section F.1.2.2 */ - r = 0; /* r = run length of zeros */ + { + int r = 0; /* r = run length of zeros */ /* Manually unroll the k loop to eliminate the counter variable. This * improves performance greatly on systems with a limited number of @@ -563,51 +606,46 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val, */ #define kloop(jpeg_natural_order_of_k) { \ if ((temp = block[jpeg_natural_order_of_k]) == 0) { \ - r++; \ + r += 16; \ } else { \ - temp2 = temp; \ /* Branch-less absolute value, bitwise complement, etc., same as above */ \ - temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \ - temp ^= temp3; \ - temp -= temp3; \ - temp2 += temp3; \ - nbits = JPEG_NBITS_NONZERO(temp); \ + nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \ + temp += nbits; \ + nbits ^= temp; \ + nbits = JPEG_NBITS_NONZERO(nbits); \ /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ - while (r > 15) { \ - EMIT_BITS(code_0xf0, size_0xf0) \ - r -= 16; \ + while (r >= 16 * 16) { \ + r -= 16 * 16; \ + PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \ } \ /* Emit Huffman symbol for run length / number of bits */ \ - temp3 = (r << 4) + nbits; \ - code = actbl->ehufco[temp3]; \ - size = actbl->ehufsi[temp3]; \ - EMIT_CODE(code, size) \ + r += nbits; \ + PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \ r = 0; \ } \ } - /* One iteration for each value in jpeg_natural_order[] */ - kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3); - kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18); - kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26); - kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27); - kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21); - kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57); - kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15); - kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58); - kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39); - kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47); - kloop(55); kloop(62); kloop(63); - - /* If the last coef(s) were zero, emit an end-of-block code */ - if (r > 0) { - code = actbl->ehufco[0]; - size = actbl->ehufsi[0]; - EMIT_BITS(code, size) + /* One iteration for each value in jpeg_natural_order[] */ + kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3); + kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18); + kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26); + kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27); + kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21); + kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57); + kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15); + kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58); + kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39); + kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47); + kloop(55); kloop(62); kloop(63); + + /* If the last coef(s) were zero, emit an end-of-block code */ + if (r > 0) { + PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0]) + } } - state->cur.put_buffer = put_buffer; - state->cur.put_bits = put_bits; + state->cur.put_buffer.c = put_buffer; + state->cur.free_bits = free_bits; STORE_BUFFER() return TRUE; @@ -654,8 +692,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ state.next_output_byte = cinfo->dest->next_output_byte; state.free_in_buffer = cinfo->dest->free_in_buffer; - ASSIGN_STATE(state.cur, entropy->saved); + state.cur = entropy->saved; state.cinfo = cinfo; + state.simd = entropy->simd; /* Emit restart marker if needed */ if (cinfo->restart_interval) { @@ -694,7 +733,7 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Completed MCU, so update state */ cinfo->dest->next_output_byte = state.next_output_byte; cinfo->dest->free_in_buffer = state.free_in_buffer; - ASSIGN_STATE(entropy->saved, state.cur); + entropy->saved = state.cur; /* Update restart-interval state too */ if (cinfo->restart_interval) { @@ -723,8 +762,9 @@ finish_pass_huff(j_compress_ptr cinfo) /* Load up working state ... flush_bits needs it */ state.next_output_byte = cinfo->dest->next_output_byte; state.free_in_buffer = cinfo->dest->free_in_buffer; - ASSIGN_STATE(state.cur, entropy->saved); + state.cur = entropy->saved; state.cinfo = cinfo; + state.simd = entropy->simd; /* Flush out the last data */ if (!flush_bits(&state)) @@ -733,7 +773,7 @@ finish_pass_huff(j_compress_ptr cinfo) /* Update state */ cinfo->dest->next_output_byte = state.next_output_byte; cinfo->dest->free_in_buffer = state.free_in_buffer; - ASSIGN_STATE(entropy->saved, state.cur); + entropy->saved = state.cur; } diff --git a/3rdparty/libjpeg-turbo/src/jcphuff.c b/3rdparty/libjpeg-turbo/src/jcphuff.c index a8b94bed84..bd14fc27d5 100644 --- a/3rdparty/libjpeg-turbo/src/jcphuff.c +++ b/3rdparty/libjpeg-turbo/src/jcphuff.c @@ -4,8 +4,9 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2011, 2015, 2018, D. R. Commander. + * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander. * Copyright (C) 2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -51,15 +52,19 @@ * flags (this defines __thumb__). */ -/* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ + defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" @@ -169,24 +174,26 @@ INLINE METHODDEF(int) count_zeroes(size_t *x) { - int result; #if defined(HAVE_BUILTIN_CTZL) + int result; result = __builtin_ctzl(*x); *x >>= result; #elif defined(HAVE_BITSCANFORWARD64) + unsigned long result; _BitScanForward64(&result, *x); *x >>= result; #elif defined(HAVE_BITSCANFORWARD) + unsigned long result; _BitScanForward(&result, *x); *x >>= result; #else - result = 0; + int result = 0; while ((*x & 1) == 0) { ++result; *x >>= 1; } #endif - return result; + return (int)result; } @@ -860,7 +867,7 @@ encode_mcu_AC_refine_prepare(const JCOEF *block, #define ENCODE_COEFS_AC_REFINE(label) { \ while (zerobits) { \ - int idx = count_zeroes(&zerobits); \ + idx = count_zeroes(&zerobits); \ r += idx; \ cabsvalue += idx; \ signbits >>= idx; \ @@ -917,7 +924,7 @@ METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; - register int temp, r; + register int temp, r, idx; char *BR_buffer; unsigned int BR; int Sl = cinfo->Se - cinfo->Ss + 1; @@ -968,7 +975,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) if (zerobits) { int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue); - int idx = count_zeroes(&zerobits); + idx = count_zeroes(&zerobits); signbits >>= idx; idx += diff; r += idx; diff --git a/3rdparty/libjpeg-turbo/src/jcsample.c b/3rdparty/libjpeg-turbo/src/jcsample.c index bd27b84e06..e8515ebf0f 100644 --- a/3rdparty/libjpeg-turbo/src/jcsample.c +++ b/3rdparty/libjpeg-turbo/src/jcsample.c @@ -6,7 +6,7 @@ * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2014, MIPS Technologies, Inc., California. - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2015, 2019, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -103,7 +103,7 @@ expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols, if (numcols > 0) { for (row = 0; row < num_rows; row++) { ptr = image_data[row] + input_cols; - pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ + pixval = ptr[-1]; for (count = numcols; count > 0; count--) *ptr++ = pixval; } @@ -174,7 +174,7 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, for (v = 0; v < v_expand; v++) { inptr = input_data[inrow + v] + outcol_h; for (h = 0; h < h_expand; h++) { - outvalue += (JLONG)GETJSAMPLE(*inptr++); + outvalue += (JLONG)(*inptr++); } } *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix); @@ -237,8 +237,7 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, inptr = input_data[outrow]; bias = 0; /* bias = 0,1,0,1,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { - *outptr++ = - (JSAMPLE)((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) + bias) >> 1); + *outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1); bias ^= 1; /* 0=>1, 1=>0 */ inptr += 2; } @@ -277,8 +276,7 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, bias = 1; /* bias = 1,2,1,2,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { *outptr++ = - (JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2); + (JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2); bias ^= 3; /* 1=>2, 2=>1 */ inptr0 += 2; inptr1 += 2; } @@ -337,33 +335,25 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, below_ptr = input_data[inrow + 2]; /* Special case for first column: pretend column -1 is same as column 0 */ - membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); - neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]); + membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1]; + neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] + + inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2]; neighsum += neighsum; - neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); + neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2]; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; for (colctr = output_cols - 2; colctr > 0; colctr--) { /* sum of pixels directly mapped to this output element */ - membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1]; /* sum of edge-neighbor pixels */ - neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]); + neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] + + inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2]; /* The edge-neighbors count twice as much as corner-neighbors */ neighsum += neighsum; /* Add in the corner-neighbors */ - neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]); + neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2]; /* form final output scaled up by 2^16 */ membersum = membersum * memberscale + neighsum * neighscale; /* round, descale and output it */ @@ -372,15 +362,11 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, } /* Special case for last column */ - membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); - neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]); + membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1]; + neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] + + inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1]; neighsum += neighsum; - neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); + neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1]; membersum = membersum * memberscale + neighsum * neighscale; *outptr = (JSAMPLE)((membersum + 32768) >> 16); @@ -429,21 +415,18 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, below_ptr = input_data[outrow + 1]; /* Special case for first column */ - colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) + - GETJSAMPLE(*inptr); - membersum = GETJSAMPLE(*inptr++); - nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + colsum = (*above_ptr++) + (*below_ptr++) + inptr[0]; + membersum = *inptr++; + nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0]; neighsum = colsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); lastcolsum = colsum; colsum = nextcolsum; for (colctr = output_cols - 2; colctr > 0; colctr--) { - membersum = GETJSAMPLE(*inptr++); + membersum = *inptr++; above_ptr++; below_ptr++; - nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0]; neighsum = lastcolsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); @@ -451,7 +434,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, } /* Special case for last column */ - membersum = GETJSAMPLE(*inptr); + membersum = *inptr; neighsum = lastcolsum + (colsum - membersum) + colsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr = (JSAMPLE)((membersum + 32768) >> 16); diff --git a/3rdparty/libjpeg-turbo/src/jdapistd.c b/3rdparty/libjpeg-turbo/src/jdapistd.c index 38bd1110d9..695a620099 100644 --- a/3rdparty/libjpeg-turbo/src/jdapistd.c +++ b/3rdparty/libjpeg-turbo/src/jdapistd.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander. + * Copyright (C) 2010, 2015-2020, D. R. Commander. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -319,6 +319,8 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { JDIMENSION n; my_master_ptr master = (my_master_ptr)cinfo->master; + JSAMPLE dummy_sample[1] = { 0 }; + JSAMPROW dummy_row = dummy_sample; JSAMPARRAY scanlines = NULL; void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, @@ -329,6 +331,10 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) if (cinfo->cconvert && cinfo->cconvert->color_convert) { color_convert = cinfo->cconvert->color_convert; cinfo->cconvert->color_convert = noop_convert; + /* This just prevents UBSan from complaining about adding 0 to a NULL + * pointer. The pointer isn't actually used. + */ + scanlines = &dummy_row; } if (cinfo->cquantize && cinfo->cquantize->color_quantize) { @@ -532,6 +538,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) * decoded coefficients. This is ~5% faster for large subsets, but * it's tough to tell a difference for smaller images. */ + if (!cinfo->entropy->insufficient_data) + cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row; (*cinfo->entropy->decode_mcu) (cinfo, NULL); } } diff --git a/3rdparty/libjpeg-turbo/src/jdarith.c b/3rdparty/libjpeg-turbo/src/jdarith.c index 6002481e24..7f0d3a785c 100644 --- a/3rdparty/libjpeg-turbo/src/jdarith.c +++ b/3rdparty/libjpeg-turbo/src/jdarith.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Developed 1997-2015 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2018, D. R. Commander. + * Copyright (C) 2015-2020, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -80,7 +80,7 @@ get_byte(j_decompress_ptr cinfo) if (!(*src->fill_input_buffer) (cinfo)) ERREXIT(cinfo, JERR_CANT_SUSPEND); src->bytes_in_buffer--; - return GETJOCTET(*src->next_input_byte++); + return *src->next_input_byte++; } @@ -665,8 +665,16 @@ bad: for (ci = 0; ci < cinfo->comps_in_scan; ci++) { int coefi, cindex = cinfo->cur_comp_info[ci]->component_index; int *coef_bit_ptr = &cinfo->coef_bits[cindex][0]; + int *prev_coef_bit_ptr = + &cinfo->coef_bits[cindex + cinfo->num_components][0]; if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) { + if (cinfo->input_scan_number > 1) + prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi]; + else + prev_coef_bit_ptr[coefi] = 0; + } for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; if (cinfo->Ah != expected) @@ -727,6 +735,7 @@ bad: entropy->c = 0; entropy->a = 0; entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + entropy->pub.insufficient_data = FALSE; /* Initialize restart counter */ entropy->restarts_to_go = cinfo->restart_interval; @@ -763,7 +772,7 @@ jinit_arith_decoder(j_decompress_ptr cinfo) int *coef_bit_ptr, ci; cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, - cinfo->num_components * DCTSIZE2 * + cinfo->num_components * 2 * DCTSIZE2 * sizeof(int)); coef_bit_ptr = &cinfo->coef_bits[0][0]; for (ci = 0; ci < cinfo->num_components; ci++) diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.c b/3rdparty/libjpeg-turbo/src/jdcoefct.c index 2ba6aa11e4..15e6cded62 100644 --- a/3rdparty/libjpeg-turbo/src/jdcoefct.c +++ b/3rdparty/libjpeg-turbo/src/jdcoefct.c @@ -5,7 +5,7 @@ * Copyright (C) 1994-1997, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander. * Copyright (C) 2015, 2020, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -102,6 +102,8 @@ decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) /* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */ jzero_far((void *)coef->MCU_buffer[0], (size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK))); + if (!cinfo->entropy->insufficient_data) + cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row; if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ coef->MCU_vert_offset = yoffset; @@ -227,6 +229,8 @@ consume_data(j_decompress_ptr cinfo) } } } + if (!cinfo->entropy->insufficient_data) + cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row; /* Try to fetch the MCU. */ if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ @@ -326,19 +330,22 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) #ifdef BLOCK_SMOOTHING_SUPPORTED /* - * This code applies interblock smoothing as described by section K.8 - * of the JPEG standard: the first 5 AC coefficients are estimated from - * the DC values of a DCT block and its 8 neighboring blocks. + * This code applies interblock smoothing; the first 9 AC coefficients are + * estimated from the DC values of a DCT block and its 24 neighboring blocks. * We apply smoothing only for progressive JPEG decoding, and only if * the coefficients it can estimate are not yet known to full precision. */ -/* Natural-order array positions of the first 5 zigzag-order coefficients */ +/* Natural-order array positions of the first 9 zigzag-order coefficients */ #define Q01_POS 1 #define Q10_POS 8 #define Q20_POS 16 #define Q11_POS 9 #define Q02_POS 2 +#define Q03_POS 3 +#define Q12_POS 10 +#define Q21_POS 17 +#define Q30_POS 24 /* * Determine whether block smoothing is applicable and safe. @@ -356,8 +363,8 @@ smoothing_ok(j_decompress_ptr cinfo) int ci, coefi; jpeg_component_info *compptr; JQUANT_TBL *qtable; - int *coef_bits; - int *coef_bits_latch; + int *coef_bits, *prev_coef_bits; + int *coef_bits_latch, *prev_coef_bits_latch; if (!cinfo->progressive_mode || cinfo->coef_bits == NULL) return FALSE; @@ -366,34 +373,47 @@ smoothing_ok(j_decompress_ptr cinfo) if (coef->coef_bits_latch == NULL) coef->coef_bits_latch = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, - cinfo->num_components * + cinfo->num_components * 2 * (SAVED_COEFS * sizeof(int))); coef_bits_latch = coef->coef_bits_latch; + prev_coef_bits_latch = + &coef->coef_bits_latch[cinfo->num_components * SAVED_COEFS]; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* All components' quantization values must already be latched. */ if ((qtable = compptr->quant_table) == NULL) return FALSE; - /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */ + /* Verify DC & first 9 AC quantizers are nonzero to avoid zero-divide. */ if (qtable->quantval[0] == 0 || qtable->quantval[Q01_POS] == 0 || qtable->quantval[Q10_POS] == 0 || qtable->quantval[Q20_POS] == 0 || qtable->quantval[Q11_POS] == 0 || - qtable->quantval[Q02_POS] == 0) + qtable->quantval[Q02_POS] == 0 || + qtable->quantval[Q03_POS] == 0 || + qtable->quantval[Q12_POS] == 0 || + qtable->quantval[Q21_POS] == 0 || + qtable->quantval[Q30_POS] == 0) return FALSE; /* DC values must be at least partly known for all components. */ coef_bits = cinfo->coef_bits[ci]; + prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components]; if (coef_bits[0] < 0) return FALSE; + coef_bits_latch[0] = coef_bits[0]; /* Block smoothing is helpful if some AC coefficients remain inaccurate. */ - for (coefi = 1; coefi <= 5; coefi++) { + for (coefi = 1; coefi < SAVED_COEFS; coefi++) { + if (cinfo->input_scan_number > 1) + prev_coef_bits_latch[coefi] = prev_coef_bits[coefi]; + else + prev_coef_bits_latch[coefi] = -1; coef_bits_latch[coefi] = coef_bits[coefi]; if (coef_bits[coefi] != 0) smoothing_useful = TRUE; } coef_bits_latch += SAVED_COEFS; + prev_coef_bits_latch += SAVED_COEFS; } return smoothing_useful; @@ -412,17 +432,20 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) JDIMENSION block_num, last_block_column; int ci, block_row, block_rows, access_rows; JBLOCKARRAY buffer; - JBLOCKROW buffer_ptr, prev_block_row, next_block_row; + JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row; + JBLOCKROW next_block_row, next_next_block_row; JSAMPARRAY output_ptr; JDIMENSION output_col; jpeg_component_info *compptr; inverse_DCT_method_ptr inverse_DCT; - boolean first_row, last_row; + boolean change_dc; JCOEF *workspace; int *coef_bits; JQUANT_TBL *quanttbl; - JLONG Q00, Q01, Q02, Q10, Q11, Q20, num; - int DC1, DC2, DC3, DC4, DC5, DC6, DC7, DC8, DC9; + JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num; + int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12, + DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24, + DC25; int Al, pred; /* Keep a local variable to avoid looking it up more than once */ @@ -434,10 +457,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) if (cinfo->input_scan_number == cinfo->output_scan_number) { /* If input is working on current scan, we ordinarily want it to * have completed the current row. But if input scan is DC, - * we want it to keep one row ahead so that next block row's DC + * we want it to keep two rows ahead so that next two block rows' DC * values are up to date. */ - JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0; + JDIMENSION delta = (cinfo->Ss == 0) ? 2 : 0; if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta) break; } @@ -452,34 +475,53 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) if (!compptr->component_needed) continue; /* Count non-dummy DCT block rows in this iMCU row. */ - if (cinfo->output_iMCU_row < last_iMCU_row) { + if (cinfo->output_iMCU_row < last_iMCU_row - 1) { + block_rows = compptr->v_samp_factor; + access_rows = block_rows * 3; /* this and next two iMCU rows */ + } else if (cinfo->output_iMCU_row < last_iMCU_row) { block_rows = compptr->v_samp_factor; access_rows = block_rows * 2; /* this and next iMCU row */ - last_row = FALSE; } else { /* NB: can't use last_row_height here; it is input-side-dependent! */ block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor); if (block_rows == 0) block_rows = compptr->v_samp_factor; access_rows = block_rows; /* this iMCU row only */ - last_row = TRUE; } /* Align the virtual buffer for this component. */ - if (cinfo->output_iMCU_row > 0) { - access_rows += compptr->v_samp_factor; /* prior iMCU row too */ + if (cinfo->output_iMCU_row > 1) { + access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */ + buffer = (*cinfo->mem->access_virt_barray) + ((j_common_ptr)cinfo, coef->whole_image[ci], + (cinfo->output_iMCU_row - 2) * compptr->v_samp_factor, + (JDIMENSION)access_rows, FALSE); + buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */ + } else if (cinfo->output_iMCU_row > 0) { buffer = (*cinfo->mem->access_virt_barray) ((j_common_ptr)cinfo, coef->whole_image[ci], (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor, (JDIMENSION)access_rows, FALSE); buffer += compptr->v_samp_factor; /* point to current iMCU row */ - first_row = FALSE; } else { buffer = (*cinfo->mem->access_virt_barray) ((j_common_ptr)cinfo, coef->whole_image[ci], (JDIMENSION)0, (JDIMENSION)access_rows, FALSE); - first_row = TRUE; } - /* Fetch component-dependent info */ - coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS); + /* Fetch component-dependent info. + * If the current scan is incomplete, then we use the component-dependent + * info from the previous scan. + */ + if (cinfo->output_iMCU_row > cinfo->master->last_good_iMCU_row) + coef_bits = + coef->coef_bits_latch + ((ci + cinfo->num_components) * SAVED_COEFS); + else + coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS); + + /* We only do DC interpolation if no AC coefficient data is available. */ + change_dc = + coef_bits[1] == -1 && coef_bits[2] == -1 && coef_bits[3] == -1 && + coef_bits[4] == -1 && coef_bits[5] == -1 && coef_bits[6] == -1 && + coef_bits[7] == -1 && coef_bits[8] == -1 && coef_bits[9] == -1; + quanttbl = compptr->quant_table; Q00 = quanttbl->quantval[0]; Q01 = quanttbl->quantval[Q01_POS]; @@ -487,27 +529,51 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) Q20 = quanttbl->quantval[Q20_POS]; Q11 = quanttbl->quantval[Q11_POS]; Q02 = quanttbl->quantval[Q02_POS]; + if (change_dc) { + Q03 = quanttbl->quantval[Q03_POS]; + Q12 = quanttbl->quantval[Q12_POS]; + Q21 = quanttbl->quantval[Q21_POS]; + Q30 = quanttbl->quantval[Q30_POS]; + } inverse_DCT = cinfo->idct->inverse_DCT[ci]; output_ptr = output_buf[ci]; /* Loop over all DCT blocks to be processed. */ for (block_row = 0; block_row < block_rows; block_row++) { buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci]; - if (first_row && block_row == 0) + + if (block_row > 0 || cinfo->output_iMCU_row > 0) + prev_block_row = + buffer[block_row - 1] + cinfo->master->first_MCU_col[ci]; + else prev_block_row = buffer_ptr; + + if (block_row > 1 || cinfo->output_iMCU_row > 1) + prev_prev_block_row = + buffer[block_row - 2] + cinfo->master->first_MCU_col[ci]; + else + prev_prev_block_row = prev_block_row; + + if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row) + next_block_row = + buffer[block_row + 1] + cinfo->master->first_MCU_col[ci]; else - prev_block_row = buffer[block_row - 1] + - cinfo->master->first_MCU_col[ci]; - if (last_row && block_row == block_rows - 1) next_block_row = buffer_ptr; + + if (block_row < block_rows - 2 || + cinfo->output_iMCU_row < last_iMCU_row - 1) + next_next_block_row = + buffer[block_row + 2] + cinfo->master->first_MCU_col[ci]; else - next_block_row = buffer[block_row + 1] + - cinfo->master->first_MCU_col[ci]; + next_next_block_row = next_block_row; + /* We fetch the surrounding DC values using a sliding-register approach. - * Initialize all nine here so as to do the right thing on narrow pics. + * Initialize all 25 here so as to do the right thing on narrow pics. */ - DC1 = DC2 = DC3 = (int)prev_block_row[0][0]; - DC4 = DC5 = DC6 = (int)buffer_ptr[0][0]; - DC7 = DC8 = DC9 = (int)next_block_row[0][0]; + DC01 = DC02 = DC03 = DC04 = DC05 = (int)prev_prev_block_row[0][0]; + DC06 = DC07 = DC08 = DC09 = DC10 = (int)prev_block_row[0][0]; + DC11 = DC12 = DC13 = DC14 = DC15 = (int)buffer_ptr[0][0]; + DC16 = DC17 = DC18 = DC19 = DC20 = (int)next_block_row[0][0]; + DC21 = DC22 = DC23 = DC24 = DC25 = (int)next_next_block_row[0][0]; output_col = 0; last_block_column = compptr->width_in_blocks - 1; for (block_num = cinfo->master->first_MCU_col[ci]; @@ -515,18 +581,39 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) /* Fetch current DCT block into workspace so we can modify it. */ jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1); /* Update DC values */ - if (block_num < last_block_column) { - DC3 = (int)prev_block_row[1][0]; - DC6 = (int)buffer_ptr[1][0]; - DC9 = (int)next_block_row[1][0]; + if (block_num == cinfo->master->first_MCU_col[ci] && + block_num < last_block_column) { + DC04 = (int)prev_prev_block_row[1][0]; + DC09 = (int)prev_block_row[1][0]; + DC14 = (int)buffer_ptr[1][0]; + DC19 = (int)next_block_row[1][0]; + DC24 = (int)next_next_block_row[1][0]; } - /* Compute coefficient estimates per K.8. - * An estimate is applied only if coefficient is still zero, - * and is not known to be fully accurate. + if (block_num + 1 < last_block_column) { + DC05 = (int)prev_prev_block_row[2][0]; + DC10 = (int)prev_block_row[2][0]; + DC15 = (int)buffer_ptr[2][0]; + DC20 = (int)next_block_row[2][0]; + DC25 = (int)next_next_block_row[2][0]; + } + /* If DC interpolation is enabled, compute coefficient estimates using + * a Gaussian-like kernel, keeping the averages of the DC values. + * + * If DC interpolation is disabled, compute coefficient estimates using + * an algorithm similar to the one described in Section K.8 of the JPEG + * standard, except applied to a 5x5 window rather than a 3x3 window. + * + * An estimate is applied only if the coefficient is still zero and is + * not known to be fully accurate. */ /* AC01 */ if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) { - num = 36 * Q00 * (DC4 - DC6); + num = Q00 * (change_dc ? + (-DC01 - DC02 + DC04 + DC05 - 3 * DC06 + 13 * DC07 - + 13 * DC09 + 3 * DC10 - 3 * DC11 + 38 * DC12 - 38 * DC14 + + 3 * DC15 - 3 * DC16 + 13 * DC17 - 13 * DC19 + 3 * DC20 - + DC21 - DC22 + DC24 + DC25) : + (-7 * DC11 + 50 * DC12 - 50 * DC14 + 7 * DC15)); if (num >= 0) { pred = (int)(((Q01 << 7) + num) / (Q01 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -541,7 +628,12 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC10 */ if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) { - num = 36 * Q00 * (DC2 - DC8); + num = Q00 * (change_dc ? + (-DC01 - 3 * DC02 - 3 * DC03 - 3 * DC04 - DC05 - DC06 + + 13 * DC07 + 38 * DC08 + 13 * DC09 - DC10 + DC16 - + 13 * DC17 - 38 * DC18 - 13 * DC19 + DC20 + DC21 + + 3 * DC22 + 3 * DC23 + 3 * DC24 + DC25) : + (-7 * DC03 + 50 * DC08 - 50 * DC18 + 7 * DC23)); if (num >= 0) { pred = (int)(((Q10 << 7) + num) / (Q10 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -556,7 +648,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC20 */ if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) { - num = 9 * Q00 * (DC2 + DC8 - 2 * DC5); + num = Q00 * (change_dc ? + (DC03 + 2 * DC07 + 7 * DC08 + 2 * DC09 - 5 * DC12 - 14 * DC13 - + 5 * DC14 + 2 * DC17 + 7 * DC18 + 2 * DC19 + DC23) : + (-DC03 + 13 * DC08 - 24 * DC13 + 13 * DC18 - DC23)); if (num >= 0) { pred = (int)(((Q20 << 7) + num) / (Q20 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -571,7 +666,11 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC11 */ if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) { - num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9); + num = Q00 * (change_dc ? + (-DC01 + DC05 + 9 * DC07 - 9 * DC09 - 9 * DC17 + + 9 * DC19 + DC21 - DC25) : + (DC10 + DC16 - 10 * DC17 + 10 * DC19 - DC02 - DC20 + DC22 - + DC24 + DC04 - DC06 + 10 * DC07 - 10 * DC09)); if (num >= 0) { pred = (int)(((Q11 << 7) + num) / (Q11 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -586,7 +685,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC02 */ if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) { - num = 9 * Q00 * (DC4 + DC6 - 2 * DC5); + num = Q00 * (change_dc ? + (2 * DC07 - 5 * DC08 + 2 * DC09 + DC11 + 7 * DC12 - 14 * DC13 + + 7 * DC14 + DC15 + 2 * DC17 - 5 * DC18 + 2 * DC19) : + (-DC11 + 13 * DC12 - 24 * DC13 + 13 * DC14 - DC15)); if (num >= 0) { pred = (int)(((Q02 << 7) + num) / (Q02 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -599,14 +701,96 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } workspace[2] = (JCOEF)pred; } + if (change_dc) { + /* AC03 */ + if ((Al = coef_bits[6]) != 0 && workspace[3] == 0) { + num = Q00 * (DC07 - DC09 + 2 * DC12 - 2 * DC14 + DC17 - DC19); + if (num >= 0) { + pred = (int)(((Q03 << 7) + num) / (Q03 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q03 << 7) - num) / (Q03 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[3] = (JCOEF)pred; + } + /* AC12 */ + if ((Al = coef_bits[7]) != 0 && workspace[10] == 0) { + num = Q00 * (DC07 - 3 * DC08 + DC09 - DC17 + 3 * DC18 - DC19); + if (num >= 0) { + pred = (int)(((Q12 << 7) + num) / (Q12 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q12 << 7) - num) / (Q12 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[10] = (JCOEF)pred; + } + /* AC21 */ + if ((Al = coef_bits[8]) != 0 && workspace[17] == 0) { + num = Q00 * (DC07 - DC09 - 3 * DC12 + 3 * DC14 + DC17 - DC19); + if (num >= 0) { + pred = (int)(((Q21 << 7) + num) / (Q21 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q21 << 7) - num) / (Q21 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[17] = (JCOEF)pred; + } + /* AC30 */ + if ((Al = coef_bits[9]) != 0 && workspace[24] == 0) { + num = Q00 * (DC07 + 2 * DC08 + DC09 - DC17 - 2 * DC18 - DC19); + if (num >= 0) { + pred = (int)(((Q30 << 7) + num) / (Q30 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q30 << 7) - num) / (Q30 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[24] = (JCOEF)pred; + } + /* coef_bits[0] is non-negative. Otherwise this function would not + * be called. + */ + num = Q00 * + (-2 * DC01 - 6 * DC02 - 8 * DC03 - 6 * DC04 - 2 * DC05 - + 6 * DC06 + 6 * DC07 + 42 * DC08 + 6 * DC09 - 6 * DC10 - + 8 * DC11 + 42 * DC12 + 152 * DC13 + 42 * DC14 - 8 * DC15 - + 6 * DC16 + 6 * DC17 + 42 * DC18 + 6 * DC19 - 6 * DC20 - + 2 * DC21 - 6 * DC22 - 8 * DC23 - 6 * DC24 - 2 * DC25); + if (num >= 0) { + pred = (int)(((Q00 << 7) + num) / (Q00 << 8)); + } else { + pred = (int)(((Q00 << 7) - num) / (Q00 << 8)); + pred = -pred; + } + workspace[0] = (JCOEF)pred; + } /* change_dc */ + /* OK, do the IDCT */ (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr, output_col); /* Advance for next column */ - DC1 = DC2; DC2 = DC3; - DC4 = DC5; DC5 = DC6; - DC7 = DC8; DC8 = DC9; - buffer_ptr++, prev_block_row++, next_block_row++; + DC01 = DC02; DC02 = DC03; DC03 = DC04; DC04 = DC05; + DC06 = DC07; DC07 = DC08; DC08 = DC09; DC09 = DC10; + DC11 = DC12; DC12 = DC13; DC13 = DC14; DC14 = DC15; + DC16 = DC17; DC17 = DC18; DC18 = DC19; DC19 = DC20; + DC21 = DC22; DC22 = DC23; DC23 = DC24; DC24 = DC25; + buffer_ptr++, prev_block_row++, next_block_row++, + prev_prev_block_row++, next_next_block_row++; output_col += compptr->_DCT_scaled_size; } output_ptr += compptr->_DCT_scaled_size; @@ -655,7 +839,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer) #ifdef BLOCK_SMOOTHING_SUPPORTED /* If block smoothing could be used, need a bigger window */ if (cinfo->progressive_mode) - access_rows *= 3; + access_rows *= 5; #endif coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) ((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE, diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.h b/3rdparty/libjpeg-turbo/src/jdcoefct.h index c4d1943dd4..9a0e780663 100644 --- a/3rdparty/libjpeg-turbo/src/jdcoefct.h +++ b/3rdparty/libjpeg-turbo/src/jdcoefct.h @@ -5,6 +5,7 @@ * Copyright (C) 1994-1997, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2020, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. */ @@ -51,7 +52,7 @@ typedef struct { #ifdef BLOCK_SMOOTHING_SUPPORTED /* When doing block smoothing, we latch coefficient Al values here */ int *coef_bits_latch; -#define SAVED_COEFS 6 /* we save coef_bits[0..5] */ +#define SAVED_COEFS 10 /* we save coef_bits[0..9] */ #endif } my_coef_controller; diff --git a/3rdparty/libjpeg-turbo/src/jdcol565.c b/3rdparty/libjpeg-turbo/src/jdcol565.c index 40068ef84f..53c7bd9187 100644 --- a/3rdparty/libjpeg-turbo/src/jdcol565.c +++ b/3rdparty/libjpeg-turbo/src/jdcol565.c @@ -45,9 +45,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; @@ -58,18 +58,18 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; b = range_limit[y + Cbbtab[cb]]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; @@ -80,9 +80,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr += 4; } if (num_cols & 1) { - y = GETJSAMPLE(*inptr0); - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + y = *inptr0; + cb = *inptr1; + cr = *inptr2; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; @@ -125,9 +125,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -139,9 +139,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -150,9 +150,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -165,9 +165,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr += 4; } if (num_cols & 1) { - y = GETJSAMPLE(*inptr0); - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + y = *inptr0; + cb = *inptr1; + cr = *inptr2; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -202,32 +202,32 @@ rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - r = GETJSAMPLE(*inptr0++); - g = GETJSAMPLE(*inptr1++); - b = GETJSAMPLE(*inptr2++); + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - r = GETJSAMPLE(*inptr0++); - g = GETJSAMPLE(*inptr1++); - b = GETJSAMPLE(*inptr2++); + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; rgb = PACK_SHORT_565(r, g, b); - r = GETJSAMPLE(*inptr0++); - g = GETJSAMPLE(*inptr1++); - b = GETJSAMPLE(*inptr2++); + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); outptr += 4; } if (num_cols & 1) { - r = GETJSAMPLE(*inptr0); - g = GETJSAMPLE(*inptr1); - b = GETJSAMPLE(*inptr2); + r = *inptr0; + g = *inptr1; + b = *inptr2; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; } @@ -259,24 +259,24 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + r = range_limit[DITHER_565_R(*inptr0++, d0)]; + g = range_limit[DITHER_565_G(*inptr1++, d0)]; + b = range_limit[DITHER_565_B(*inptr2++, d0)]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + r = range_limit[DITHER_565_R(*inptr0++, d0)]; + g = range_limit[DITHER_565_G(*inptr1++, d0)]; + b = range_limit[DITHER_565_B(*inptr2++, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + r = range_limit[DITHER_565_R(*inptr0++, d0)]; + g = range_limit[DITHER_565_G(*inptr1++, d0)]; + b = range_limit[DITHER_565_B(*inptr2++, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); @@ -284,9 +284,9 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr += 4; } if (num_cols & 1) { - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)]; + r = range_limit[DITHER_565_R(*inptr0, d0)]; + g = range_limit[DITHER_565_G(*inptr1, d0)]; + b = range_limit[DITHER_565_B(*inptr2, d0)]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; } diff --git a/3rdparty/libjpeg-turbo/src/jdcolext.c b/3rdparty/libjpeg-turbo/src/jdcolext.c index 72a5301070..863c7a2fbc 100644 --- a/3rdparty/libjpeg-turbo/src/jdcolext.c +++ b/3rdparty/libjpeg-turbo/src/jdcolext.c @@ -53,9 +53,9 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - y = GETJSAMPLE(inptr0[col]); - cb = GETJSAMPLE(inptr1[col]); - cr = GETJSAMPLE(inptr2[col]); + y = inptr0[col]; + cb = inptr1[col]; + cr = inptr2[col]; /* Range-limiting is essential due to noise introduced by DCT losses. */ outptr[RGB_RED] = range_limit[y + Crrtab[cr]]; outptr[RGB_GREEN] = range_limit[y + @@ -93,7 +93,6 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, inptr = input_buf[0][input_row++]; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - /* We can dispense with GETJSAMPLE() here */ outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col]; /* Set unused byte to 0xFF so it can be interpreted as an opaque */ /* alpha channel value */ @@ -128,7 +127,6 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - /* We can dispense with GETJSAMPLE() here */ outptr[RGB_RED] = inptr0[col]; outptr[RGB_GREEN] = inptr1[col]; outptr[RGB_BLUE] = inptr2[col]; diff --git a/3rdparty/libjpeg-turbo/src/jdcolor.c b/3rdparty/libjpeg-turbo/src/jdcolor.c index d3ae40c7da..8da2b4eaf2 100644 --- a/3rdparty/libjpeg-turbo/src/jdcolor.c +++ b/3rdparty/libjpeg-turbo/src/jdcolor.c @@ -341,9 +341,9 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr0[col]); - g = GETJSAMPLE(inptr1[col]); - b = GETJSAMPLE(inptr2[col]); + r = inptr0[col]; + g = inptr1[col]; + b = inptr2[col]; /* Y */ outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + ctab[b + B_Y_OFF]) >> SCALEBITS); @@ -550,9 +550,9 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - y = GETJSAMPLE(inptr0[col]); - cb = GETJSAMPLE(inptr1[col]); - cr = GETJSAMPLE(inptr2[col]); + y = inptr0[col]; + cb = inptr1[col]; + cr = inptr2[col]; /* Range-limiting is essential due to noise introduced by DCT losses. */ outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */ outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */ @@ -560,7 +560,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, SCALEBITS)))]; outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */ /* K passes through unchanged */ - outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */ + outptr[3] = inptr3[col]; outptr += 4; } } diff --git a/3rdparty/libjpeg-turbo/src/jdhuff.c b/3rdparty/libjpeg-turbo/src/jdhuff.c index a1128178b0..f786c10547 100644 --- a/3rdparty/libjpeg-turbo/src/jdhuff.c +++ b/3rdparty/libjpeg-turbo/src/jdhuff.c @@ -5,6 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander. + * Copyright (C) 2018, Matthias Räncker. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -39,24 +40,6 @@ typedef struct { int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. - */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest, src) ((dest) = (src)) -#else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest, src) \ - ((dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) -#endif -#endif - - typedef struct { struct jpeg_entropy_decoder pub; /* public fields */ @@ -325,7 +308,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state, bytes_in_buffer = cinfo->src->bytes_in_buffer; } bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); + c = *next_input_byte++; /* If it's 0xFF, check and discard stuffed zero byte */ if (c == 0xFF) { @@ -342,7 +325,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state, bytes_in_buffer = cinfo->src->bytes_in_buffer; } bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); + c = *next_input_byte++; } while (c == 0xFF); if (c == 0) { @@ -405,8 +388,8 @@ no_more_bytes: #define GET_BYTE { \ register int c0, c1; \ - c0 = GETJOCTET(*buffer++); \ - c1 = GETJOCTET(*buffer); \ + c0 = *buffer++; \ + c1 = *buffer; \ /* Pre-execute most common case */ \ get_buffer = (get_buffer << 8) | c0; \ bits_left += 8; \ @@ -423,7 +406,7 @@ no_more_bytes: } \ } -#if SIZEOF_SIZE_T == 8 || defined(_WIN64) +#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__)) /* Pre-fetch 48 bytes, because the holding register is 64-bit */ #define FILL_BIT_BUFFER_FAST \ @@ -557,6 +540,12 @@ process_restart(j_decompress_ptr cinfo) } +#if defined(__has_feature) +#if __has_feature(undefined_behavior_sanitizer) +__attribute__((no_sanitize("signed-integer-overflow"), + no_sanitize("unsigned-integer-overflow"))) +#endif +#endif LOCAL(boolean) decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { @@ -568,7 +557,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ BITREAD_LOAD_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(state, entropy->saved); + state = entropy->saved; for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; @@ -589,11 +578,15 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (entropy->dc_needed[blkn]) { /* Convert DC difference to actual value, update last_dc_val */ int ci = cinfo->MCU_membership[blkn]; - /* This is really just - * s += state.last_dc_val[ci]; - * It is written this way in order to shut up UBSan. + /* Certain malformed JPEG images produce repeated DC coefficient + * differences of 2047 or -2047, which causes state.last_dc_val[ci] to + * grow until it overflows or underflows a 32-bit signed integer. This + * behavior is, to the best of our understanding, innocuous, and it is + * unclear how to work around it without potentially affecting + * performance. Thus, we (hopefully temporarily) suppress UBSan integer + * overflow errors for this function. */ - s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]); + s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; if (block) { /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ @@ -653,7 +646,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Completed MCU, so update state */ BITREAD_SAVE_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); + entropy->saved = state; return TRUE; } @@ -671,7 +664,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ BITREAD_LOAD_STATE(cinfo, entropy->bitstate); buffer = (JOCTET *)br_state.next_input_byte; - ASSIGN_STATE(state, entropy->saved); + state = entropy->saved; for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; @@ -688,7 +681,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (entropy->dc_needed[blkn]) { int ci = cinfo->MCU_membership[blkn]; - s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]); + s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; if (block) (*block)[0] = (JCOEF)s; @@ -740,7 +733,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte); br_state.next_input_byte = buffer; BITREAD_SAVE_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); + entropy->saved = state; return TRUE; } @@ -795,7 +788,8 @@ use_slow: } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } diff --git a/3rdparty/libjpeg-turbo/src/jdhuff.h b/3rdparty/libjpeg-turbo/src/jdhuff.h index 6a8d90f402..cfa0b7f558 100644 --- a/3rdparty/libjpeg-turbo/src/jdhuff.h +++ b/3rdparty/libjpeg-turbo/src/jdhuff.h @@ -4,7 +4,8 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2010-2011, 2015-2016, D. R. Commander. + * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander. + * Copyright (C) 2018, Matthias Räncker. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, typedef size_t bit_buf_type; /* type of bit-extraction buffer */ #define BIT_BUF_SIZE 64 /* size of buffer in bits */ +#elif defined(__x86_64__) && defined(__ILP32__) + +typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */ +#define BIT_BUF_SIZE 64 /* size of buffer in bits */ + #else typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */ @@ -228,7 +234,10 @@ slowlabel: \ s |= GET_BITS(1); \ nb++; \ } \ - s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \ + if (nb > 16) \ + s = 0; \ + else \ + s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \ } /* Out-of-line case for Huffman code fetching */ diff --git a/3rdparty/libjpeg-turbo/src/jdicc.c b/3rdparty/libjpeg-turbo/src/jdicc.c index 7224695816..a1a5b867ae 100644 --- a/3rdparty/libjpeg-turbo/src/jdicc.c +++ b/3rdparty/libjpeg-turbo/src/jdicc.c @@ -38,18 +38,18 @@ marker_is_icc(jpeg_saved_marker_ptr marker) marker->marker == ICC_MARKER && marker->data_length >= ICC_OVERHEAD_LEN && /* verify the identifying string */ - GETJOCTET(marker->data[0]) == 0x49 && - GETJOCTET(marker->data[1]) == 0x43 && - GETJOCTET(marker->data[2]) == 0x43 && - GETJOCTET(marker->data[3]) == 0x5F && - GETJOCTET(marker->data[4]) == 0x50 && - GETJOCTET(marker->data[5]) == 0x52 && - GETJOCTET(marker->data[6]) == 0x4F && - GETJOCTET(marker->data[7]) == 0x46 && - GETJOCTET(marker->data[8]) == 0x49 && - GETJOCTET(marker->data[9]) == 0x4C && - GETJOCTET(marker->data[10]) == 0x45 && - GETJOCTET(marker->data[11]) == 0x0; + marker->data[0] == 0x49 && + marker->data[1] == 0x43 && + marker->data[2] == 0x43 && + marker->data[3] == 0x5F && + marker->data[4] == 0x50 && + marker->data[5] == 0x52 && + marker->data[6] == 0x4F && + marker->data[7] == 0x46 && + marker->data[8] == 0x49 && + marker->data[9] == 0x4C && + marker->data[10] == 0x45 && + marker->data[11] == 0x0; } @@ -102,12 +102,12 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr, for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) { if (marker_is_icc(marker)) { if (num_markers == 0) - num_markers = GETJOCTET(marker->data[13]); - else if (num_markers != GETJOCTET(marker->data[13])) { + num_markers = marker->data[13]; + else if (num_markers != marker->data[13]) { WARNMS(cinfo, JWRN_BOGUS_ICC); /* inconsistent num_markers fields */ return FALSE; } - seq_no = GETJOCTET(marker->data[12]); + seq_no = marker->data[12]; if (seq_no <= 0 || seq_no > num_markers) { WARNMS(cinfo, JWRN_BOGUS_ICC); /* bogus sequence number */ return FALSE; @@ -154,7 +154,7 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr, JOCTET FAR *src_ptr; JOCTET *dst_ptr; unsigned int length; - seq_no = GETJOCTET(marker->data[12]); + seq_no = marker->data[12]; dst_ptr = icc_data + data_offset[seq_no]; src_ptr = marker->data + ICC_OVERHEAD_LEN; length = data_length[seq_no]; diff --git a/3rdparty/libjpeg-turbo/src/jdmarker.c b/3rdparty/libjpeg-turbo/src/jdmarker.c index c9c7ef6399..b964c3a1a6 100644 --- a/3rdparty/libjpeg-turbo/src/jdmarker.c +++ b/3rdparty/libjpeg-turbo/src/jdmarker.c @@ -151,7 +151,7 @@ typedef my_marker_reader *my_marker_ptr; #define INPUT_BYTE(cinfo, V, action) \ MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \ bytes_in_buffer--; \ - V = GETJOCTET(*next_input_byte++); ) + V = *next_input_byte++; ) /* As above, but read two bytes interpreted as an unsigned 16-bit integer. * V should be declared unsigned int or perhaps JLONG. @@ -159,10 +159,10 @@ typedef my_marker_reader *my_marker_ptr; #define INPUT_2BYTES(cinfo, V, action) \ MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \ bytes_in_buffer--; \ - V = ((unsigned int)GETJOCTET(*next_input_byte++)) << 8; \ + V = ((unsigned int)(*next_input_byte++)) << 8; \ MAKE_BYTE_AVAIL(cinfo, action); \ bytes_in_buffer--; \ - V += GETJOCTET(*next_input_byte++); ) + V += *next_input_byte++; ) /* @@ -608,18 +608,18 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, JLONG totallen = (JLONG)datalen + remaining; if (datalen >= APP0_DATA_LEN && - GETJOCTET(data[0]) == 0x4A && - GETJOCTET(data[1]) == 0x46 && - GETJOCTET(data[2]) == 0x49 && - GETJOCTET(data[3]) == 0x46 && - GETJOCTET(data[4]) == 0) { + data[0] == 0x4A && + data[1] == 0x46 && + data[2] == 0x49 && + data[3] == 0x46 && + data[4] == 0) { /* Found JFIF APP0 marker: save info */ cinfo->saw_JFIF_marker = TRUE; - cinfo->JFIF_major_version = GETJOCTET(data[5]); - cinfo->JFIF_minor_version = GETJOCTET(data[6]); - cinfo->density_unit = GETJOCTET(data[7]); - cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]); - cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]); + cinfo->JFIF_major_version = data[5]; + cinfo->JFIF_minor_version = data[6]; + cinfo->density_unit = data[7]; + cinfo->X_density = (data[8] << 8) + data[9]; + cinfo->Y_density = (data[10] << 8) + data[11]; /* Check version. * Major version must be 1, anything else signals an incompatible change. * (We used to treat this as an error, but now it's a nonfatal warning, @@ -634,24 +634,22 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, cinfo->JFIF_major_version, cinfo->JFIF_minor_version, cinfo->X_density, cinfo->Y_density, cinfo->density_unit); /* Validate thumbnail dimensions and issue appropriate messages */ - if (GETJOCTET(data[12]) | GETJOCTET(data[13])) - TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, - GETJOCTET(data[12]), GETJOCTET(data[13])); + if (data[12] | data[13]) + TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, data[12], data[13]); totallen -= APP0_DATA_LEN; - if (totallen != - ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG)3)) + if (totallen != ((JLONG)data[12] * (JLONG)data[13] * (JLONG)3)) TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen); } else if (datalen >= 6 && - GETJOCTET(data[0]) == 0x4A && - GETJOCTET(data[1]) == 0x46 && - GETJOCTET(data[2]) == 0x58 && - GETJOCTET(data[3]) == 0x58 && - GETJOCTET(data[4]) == 0) { + data[0] == 0x4A && + data[1] == 0x46 && + data[2] == 0x58 && + data[3] == 0x58 && + data[4] == 0) { /* Found JFIF "JFXX" extension APP0 marker */ /* The library doesn't actually do anything with these, * but we try to produce a helpful trace message. */ - switch (GETJOCTET(data[5])) { + switch (data[5]) { case 0x10: TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen); break; @@ -662,8 +660,7 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen); break; default: - TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, - GETJOCTET(data[5]), (int)totallen); + TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, data[5], (int)totallen); break; } } else { @@ -684,16 +681,16 @@ examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, unsigned int version, flags0, flags1, transform; if (datalen >= APP14_DATA_LEN && - GETJOCTET(data[0]) == 0x41 && - GETJOCTET(data[1]) == 0x64 && - GETJOCTET(data[2]) == 0x6F && - GETJOCTET(data[3]) == 0x62 && - GETJOCTET(data[4]) == 0x65) { + data[0] == 0x41 && + data[1] == 0x64 && + data[2] == 0x6F && + data[3] == 0x62 && + data[4] == 0x65) { /* Found Adobe APP14 marker */ - version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]); - flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]); - flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]); - transform = GETJOCTET(data[11]); + version = (data[5] << 8) + data[6]; + flags0 = (data[7] << 8) + data[8]; + flags1 = (data[9] << 8) + data[10]; + transform = data[11]; TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform); cinfo->saw_Adobe_marker = TRUE; cinfo->Adobe_transform = (UINT8)transform; diff --git a/3rdparty/libjpeg-turbo/src/jdmaster.c b/3rdparty/libjpeg-turbo/src/jdmaster.c index b20906438e..cbc8774b1f 100644 --- a/3rdparty/libjpeg-turbo/src/jdmaster.c +++ b/3rdparty/libjpeg-turbo/src/jdmaster.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 2002-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2016, D. R. Commander. + * Copyright (C) 2009-2011, 2016, 2019, D. R. Commander. * Copyright (C) 2013, Linaro Limited. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg @@ -22,7 +22,6 @@ #include "jpeglib.h" #include "jpegcomp.h" #include "jdmaster.h" -#include "jsimd.h" /* @@ -70,17 +69,6 @@ use_merged_upsample(j_decompress_ptr cinfo) cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size || cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size) return FALSE; -#ifdef WITH_SIMD - /* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling - isn't, then disabling merged upsampling is likely to be faster when - decompressing YCbCr JPEG images. */ - if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() && - jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr && - (cinfo->out_color_space == JCS_RGB || - (cinfo->out_color_space >= JCS_EXT_RGB && - cinfo->out_color_space <= JCS_EXT_ARGB))) - return FALSE; -#endif /* ??? also need to test for upsample-time rescaling, when & if supported */ return TRUE; /* by golly, it'll work... */ #else @@ -580,6 +568,7 @@ master_selection(j_decompress_ptr cinfo) */ cinfo->master->first_iMCU_col = 0; cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1; + cinfo->master->last_good_iMCU_row = 0; #ifdef D_MULTISCAN_FILES_SUPPORTED /* If jpeg_start_decompress will read the whole file, initialize diff --git a/3rdparty/libjpeg-turbo/src/jdmrg565.c b/3rdparty/libjpeg-turbo/src/jdmrg565.c index 53f1e16700..980a4e216e 100644 --- a/3rdparty/libjpeg-turbo/src/jdmrg565.c +++ b/3rdparty/libjpeg-turbo/src/jdmrg565.c @@ -43,20 +43,20 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each pair of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -68,12 +68,12 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); + y = *inptr0; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -115,21 +115,21 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* Loop for each pair of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; @@ -142,12 +142,12 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); + y = *inptr0; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; @@ -189,20 +189,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each group of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -211,13 +211,13 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, WRITE_TWO_PIXELS(outptr0, rgb); outptr0 += 4; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -229,20 +229,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); + y = *inptr00; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr0 = (INT16)rgb; - y = GETJSAMPLE(*inptr01); + y = *inptr01; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -287,21 +287,21 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* Loop for each group of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; @@ -311,14 +311,14 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo, WRITE_TWO_PIXELS(outptr0, rgb); outptr0 += 4; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[DITHER_565_R(y + cred, d1)]; g = range_limit[DITHER_565_G(y + cgreen, d1)]; b = range_limit[DITHER_565_B(y + cblue, d1)]; d1 = DITHER_ROTATE(d1); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[DITHER_565_R(y + cred, d1)]; g = range_limit[DITHER_565_G(y + cgreen, d1)]; b = range_limit[DITHER_565_B(y + cblue, d1)]; @@ -331,20 +331,20 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); + y = *inptr00; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr0 = (INT16)rgb; - y = GETJSAMPLE(*inptr01); + y = *inptr01; r = range_limit[DITHER_565_R(y + cred, d1)]; g = range_limit[DITHER_565_G(y + cgreen, d1)]; b = range_limit[DITHER_565_B(y + cblue, d1)]; diff --git a/3rdparty/libjpeg-turbo/src/jdmrgext.c b/3rdparty/libjpeg-turbo/src/jdmrgext.c index c9a44d8219..9bf4f1a307 100644 --- a/3rdparty/libjpeg-turbo/src/jdmrgext.c +++ b/3rdparty/libjpeg-turbo/src/jdmrgext.c @@ -46,13 +46,13 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each pair of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; @@ -60,7 +60,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr[RGB_ALPHA] = 0xFF; #endif outptr += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; @@ -71,12 +71,12 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, } /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); + y = *inptr0; outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; @@ -120,13 +120,13 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each group of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; @@ -134,7 +134,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr0[RGB_ALPHA] = 0xFF; #endif outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; @@ -142,7 +142,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr0[RGB_ALPHA] = 0xFF; #endif outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; @@ -150,7 +150,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr1[RGB_ALPHA] = 0xFF; #endif outptr1 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; @@ -161,19 +161,19 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, } /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); + y = *inptr00; outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; #ifdef RGB_ALPHA outptr0[RGB_ALPHA] = 0xFF; #endif - y = GETJSAMPLE(*inptr01); + y = *inptr01; outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; diff --git a/3rdparty/libjpeg-turbo/src/jdphuff.c b/3rdparty/libjpeg-turbo/src/jdphuff.c index 9e82636bbd..c6d82ca14b 100644 --- a/3rdparty/libjpeg-turbo/src/jdphuff.c +++ b/3rdparty/libjpeg-turbo/src/jdphuff.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2016, 2018, D. R. Commander. + * Copyright (C) 2015-2016, 2018-2021, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -41,25 +41,6 @@ typedef struct { int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. - */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest, src) ((dest) = (src)) -#else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest, src) \ - ((dest).EOBRUN = (src).EOBRUN, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) -#endif -#endif - - typedef struct { struct jpeg_entropy_decoder pub; /* public fields */ @@ -102,7 +83,7 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo) boolean is_DC_band, bad; int ci, coefi, tbl; d_derived_tbl **pdtbl; - int *coef_bit_ptr; + int *coef_bit_ptr, *prev_coef_bit_ptr; jpeg_component_info *compptr; is_DC_band = (cinfo->Ss == 0); @@ -143,8 +124,15 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo) for (ci = 0; ci < cinfo->comps_in_scan; ci++) { int cindex = cinfo->cur_comp_info[ci]->component_index; coef_bit_ptr = &cinfo->coef_bits[cindex][0]; + prev_coef_bit_ptr = &cinfo->coef_bits[cindex + cinfo->num_components][0]; if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) { + if (cinfo->input_scan_number > 1) + prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi]; + else + prev_coef_bit_ptr[coefi] = 0; + } for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; if (cinfo->Ah != expected) @@ -323,7 +311,7 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ BITREAD_LOAD_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(state, entropy->saved); + state = entropy->saved; /* Outer loop handles each block in the MCU */ @@ -356,11 +344,12 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Completed MCU, so update state */ BITREAD_SAVE_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); + entropy->saved = state; } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } @@ -444,7 +433,8 @@ decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } @@ -495,7 +485,8 @@ decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) BITREAD_SAVE_STATE(cinfo, entropy->bitstate); /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } @@ -638,7 +629,8 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; @@ -676,7 +668,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo) /* Create progression status table */ cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, - cinfo->num_components * DCTSIZE2 * + cinfo->num_components * 2 * DCTSIZE2 * sizeof(int)); coef_bit_ptr = &cinfo->coef_bits[0][0]; for (ci = 0; ci < cinfo->num_components; ci++) diff --git a/3rdparty/libjpeg-turbo/src/jdsample.c b/3rdparty/libjpeg-turbo/src/jdsample.c index 50a68b3013..eaad72a030 100644 --- a/3rdparty/libjpeg-turbo/src/jdsample.c +++ b/3rdparty/libjpeg-turbo/src/jdsample.c @@ -8,7 +8,7 @@ * Copyright (C) 2010, 2015-2016, D. R. Commander. * Copyright (C) 2014, MIPS Technologies, Inc., California. * Copyright (C) 2015, Google, Inc. - * Copyright (C) 2019, Arm Limited. + * Copyright (C) 2019-2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -177,7 +177,7 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; for (h = h_expand; h > 0; h--) { *outptr++ = invalue; } @@ -213,7 +213,7 @@ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[inrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; *outptr++ = invalue; *outptr++ = invalue; } @@ -242,7 +242,7 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; *outptr++ = invalue; *outptr++ = invalue; } @@ -283,20 +283,20 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, inptr = input_data[inrow]; outptr = output_data[inrow]; /* Special case for first column */ - invalue = GETJSAMPLE(*inptr++); + invalue = *inptr++; *outptr++ = (JSAMPLE)invalue; - *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2); + *outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2); for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { /* General case: 3/4 * nearer pixel + 1/4 * further pixel */ - invalue = GETJSAMPLE(*inptr++) * 3; - *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2); - *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(*inptr) + 2) >> 2); + invalue = (*inptr++) * 3; + *outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2); + *outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2); } /* Special case for last column */ - invalue = GETJSAMPLE(*inptr); - *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2); + invalue = *inptr; + *outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2); *outptr++ = (JSAMPLE)invalue; } } @@ -338,7 +338,7 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow++]; for (colctr = 0; colctr < compptr->downsampled_width; colctr++) { - thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + thiscolsum = (*inptr0++) * 3 + (*inptr1++); *outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2); } } @@ -381,8 +381,8 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow++]; /* Special case for first column */ - thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + thiscolsum = (*inptr0++) * 3 + (*inptr1++); + nextcolsum = (*inptr0++) * 3 + (*inptr1++); *outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4); *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4); lastcolsum = thiscolsum; thiscolsum = nextcolsum; @@ -390,7 +390,7 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + nextcolsum = (*inptr0++) * 3 + (*inptr1++); *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4); *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4); lastcolsum = thiscolsum; thiscolsum = nextcolsum; @@ -477,7 +477,13 @@ jinit_upsampler(j_decompress_ptr cinfo) } else if (h_in_group == h_out_group && v_in_group * 2 == v_out_group && do_fancy) { /* Non-fancy upsampling is handled by the generic method */ - upsample->methods[ci] = h1v2_fancy_upsample; +#if defined(__arm__) || defined(__aarch64__) || \ + defined(_M_ARM) || defined(_M_ARM64) + if (jsimd_can_h1v2_fancy_upsample()) + upsample->methods[ci] = jsimd_h1v2_fancy_upsample; + else +#endif + upsample->methods[ci] = h1v2_fancy_upsample; upsample->pub.need_context_rows = TRUE; } else if (h_in_group * 2 == h_out_group && v_in_group * 2 == v_out_group) { diff --git a/3rdparty/libjpeg-turbo/src/jerror.h b/3rdparty/libjpeg-turbo/src/jerror.h index 933a3690fd..4476df2c93 100644 --- a/3rdparty/libjpeg-turbo/src/jerror.h +++ b/3rdparty/libjpeg-turbo/src/jerror.h @@ -207,6 +207,10 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") #endif #endif JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker") +#if JPEG_LIB_VERSION < 70 +JMESSAGE(JERR_BAD_DROP_SAMPLING, + "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") +#endif #ifdef JMAKE_ENUM_LIST @@ -252,6 +256,15 @@ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker") (cinfo)->err->msg_parm.i[2] = (p3), \ (cinfo)->err->msg_parm.i[3] = (p4), \ (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) +#define ERREXIT6(cinfo, code, p1, p2, p3, p4, p5, p6) \ + ((cinfo)->err->msg_code = (code), \ + (cinfo)->err->msg_parm.i[0] = (p1), \ + (cinfo)->err->msg_parm.i[1] = (p2), \ + (cinfo)->err->msg_parm.i[2] = (p3), \ + (cinfo)->err->msg_parm.i[3] = (p4), \ + (cinfo)->err->msg_parm.i[4] = (p5), \ + (cinfo)->err->msg_parm.i[5] = (p6), \ + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) #define ERREXITS(cinfo, code, str) \ ((cinfo)->err->msg_code = (code), \ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ diff --git a/3rdparty/libjpeg-turbo/src/jidctint.c b/3rdparty/libjpeg-turbo/src/jidctint.c index 50f385da33..bb08748019 100644 --- a/3rdparty/libjpeg-turbo/src/jidctint.c +++ b/3rdparty/libjpeg-turbo/src/jidctint.c @@ -3,7 +3,7 @@ * * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. - * Modification developed 2002-2009 by Guido Vollbeding. + * Modification developed 2002-2018 by Guido Vollbeding. * libjpeg-turbo Modifications: * Copyright (C) 2015, 2020, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg @@ -417,7 +417,7 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, /* * Perform dequantization and inverse DCT on one block of coefficients, - * producing a 7x7 output block. + * producing a reduced-size 7x7 output block. * * Optimized algorithm with 12 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/14). @@ -1258,7 +1258,7 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr, /* * Perform dequantization and inverse DCT on one block of coefficients, - * producing a 11x11 output block. + * producing an 11x11 output block. * * Optimized algorithm with 24 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/22). @@ -2398,7 +2398,7 @@ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1); + tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1); z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ diff --git a/3rdparty/libjpeg-turbo/src/jmorecfg.h b/3rdparty/libjpeg-turbo/src/jmorecfg.h index aa29f0f9f1..fb3a9cf411 100644 --- a/3rdparty/libjpeg-turbo/src/jmorecfg.h +++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h @@ -43,25 +43,11 @@ #if BITS_IN_JSAMPLE == 8 /* JSAMPLE should be the smallest type that will hold the values 0..255. - * You can use a signed char by having GETJSAMPLE mask it with 0xFF. */ -#ifdef HAVE_UNSIGNED_CHAR - typedef unsigned char JSAMPLE; #define GETJSAMPLE(value) ((int)(value)) -#else /* not HAVE_UNSIGNED_CHAR */ - -typedef char JSAMPLE; -#ifdef __CHAR_UNSIGNED__ -#define GETJSAMPLE(value) ((int)(value)) -#else -#define GETJSAMPLE(value) ((int)(value) & 0xFF) -#endif /* __CHAR_UNSIGNED__ */ - -#endif /* HAVE_UNSIGNED_CHAR */ - #define MAXJSAMPLE 255 #define CENTERJSAMPLE 128 @@ -97,22 +83,9 @@ typedef short JCOEF; * managers, this is also the data type passed to fread/fwrite. */ -#ifdef HAVE_UNSIGNED_CHAR - typedef unsigned char JOCTET; #define GETJOCTET(value) (value) -#else /* not HAVE_UNSIGNED_CHAR */ - -typedef char JOCTET; -#ifdef __CHAR_UNSIGNED__ -#define GETJOCTET(value) (value) -#else -#define GETJOCTET(value) ((value) & 0xFF) -#endif /* __CHAR_UNSIGNED__ */ - -#endif /* HAVE_UNSIGNED_CHAR */ - /* These typedefs are used for various table entries and so forth. * They must be at least as wide as specified; but making them too big @@ -123,15 +96,7 @@ typedef char JOCTET; /* UINT8 must hold at least the values 0..255. */ -#ifdef HAVE_UNSIGNED_CHAR typedef unsigned char UINT8; -#else /* not HAVE_UNSIGNED_CHAR */ -#ifdef __CHAR_UNSIGNED__ -typedef char UINT8; -#else /* not __CHAR_UNSIGNED__ */ -typedef short UINT8; -#endif /* __CHAR_UNSIGNED__ */ -#endif /* HAVE_UNSIGNED_CHAR */ /* UINT16 must hold at least the values 0..65535. */ diff --git a/3rdparty/libjpeg-turbo/src/jpegint.h b/3rdparty/libjpeg-turbo/src/jpegint.h index ad36ca8b56..195fbcb9b6 100644 --- a/3rdparty/libjpeg-turbo/src/jpegint.h +++ b/3rdparty/libjpeg-turbo/src/jpegint.h @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2016, D. R. Commander. + * Copyright (C) 2015-2016, 2019, D. R. Commander. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -158,6 +158,9 @@ struct jpeg_decomp_master { JDIMENSION first_MCU_col[MAX_COMPONENTS]; JDIMENSION last_MCU_col[MAX_COMPONENTS]; boolean jinit_upsampler_no_alloc; + + /* Last iMCU row that was successfully decoded */ + JDIMENSION last_good_iMCU_row; }; /* Input control module */ diff --git a/3rdparty/libjpeg-turbo/src/jquant1.c b/3rdparty/libjpeg-turbo/src/jquant1.c index 40bbb28cc7..73b83e16e5 100644 --- a/3rdparty/libjpeg-turbo/src/jquant1.c +++ b/3rdparty/libjpeg-turbo/src/jquant1.c @@ -479,7 +479,7 @@ color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf, for (col = width; col > 0; col--) { pixcode = 0; for (ci = 0; ci < nc; ci++) { - pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]); + pixcode += colorindex[ci][*ptrin++]; } *ptrout++ = (JSAMPLE)pixcode; } @@ -506,9 +506,9 @@ color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf, ptrin = input_buf[row]; ptrout = output_buf[row]; for (col = width; col > 0; col--) { - pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]); - pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]); - pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]); + pixcode = colorindex0[*ptrin++]; + pixcode += colorindex1[*ptrin++]; + pixcode += colorindex2[*ptrin++]; *ptrout++ = (JSAMPLE)pixcode; } } @@ -552,7 +552,7 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, * required amount of padding. */ *output_ptr += - colorindex_ci[GETJSAMPLE(*input_ptr) + dither[col_index]]; + colorindex_ci[*input_ptr + dither[col_index]]; input_ptr += nc; output_ptr++; col_index = (col_index + 1) & ODITHER_MASK; @@ -595,12 +595,9 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, col_index = 0; for (col = width; col > 0; col--) { - pixcode = - GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + dither0[col_index]]); - pixcode += - GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + dither1[col_index]]); - pixcode += - GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + dither2[col_index]]); + pixcode = colorindex0[(*input_ptr++) + dither0[col_index]]; + pixcode += colorindex1[(*input_ptr++) + dither1[col_index]]; + pixcode += colorindex2[(*input_ptr++) + dither2[col_index]]; *output_ptr++ = (JSAMPLE)pixcode; col_index = (col_index + 1) & ODITHER_MASK; } @@ -677,15 +674,15 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, * The maximum error is +- MAXJSAMPLE; this sets the required size * of the range_limit array. */ - cur += GETJSAMPLE(*input_ptr); - cur = GETJSAMPLE(range_limit[cur]); + cur += *input_ptr; + cur = range_limit[cur]; /* Select output value, accumulate into output code for this pixel */ - pixcode = GETJSAMPLE(colorindex_ci[cur]); + pixcode = colorindex_ci[cur]; *output_ptr += (JSAMPLE)pixcode; /* Compute actual representation error at this pixel */ /* Note: we can do this even though we don't have the final */ /* pixel code, because the colormap is orthogonal. */ - cur -= GETJSAMPLE(colormap_ci[pixcode]); + cur -= colormap_ci[pixcode]; /* Compute error fractions to be propagated to adjacent pixels. * Add these into the running sums, and simultaneously shift the * next-line error sums left by 1 column. diff --git a/3rdparty/libjpeg-turbo/src/jquant2.c b/3rdparty/libjpeg-turbo/src/jquant2.c index 6570613bb9..44efb18cad 100644 --- a/3rdparty/libjpeg-turbo/src/jquant2.c +++ b/3rdparty/libjpeg-turbo/src/jquant2.c @@ -215,9 +215,9 @@ prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf, ptr = input_buf[row]; for (col = width; col > 0; col--) { /* get pixel value and index into the histogram */ - histp = &histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT] - [GETJSAMPLE(ptr[1]) >> C1_SHIFT] - [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; + histp = &histogram[ptr[0] >> C0_SHIFT] + [ptr[1] >> C1_SHIFT] + [ptr[2] >> C2_SHIFT]; /* increment, check for overflow and undo increment if so. */ if (++(*histp) <= 0) (*histp)--; @@ -665,7 +665,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, for (i = 0; i < numcolors; i++) { /* We compute the squared-c0-distance term, then add in the other two. */ - x = GETJSAMPLE(cinfo->colormap[0][i]); + x = cinfo->colormap[0][i]; if (x < minc0) { tdist = (x - minc0) * C0_SCALE; min_dist = tdist * tdist; @@ -688,7 +688,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, } } - x = GETJSAMPLE(cinfo->colormap[1][i]); + x = cinfo->colormap[1][i]; if (x < minc1) { tdist = (x - minc1) * C1_SCALE; min_dist += tdist * tdist; @@ -710,7 +710,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, } } - x = GETJSAMPLE(cinfo->colormap[2][i]); + x = cinfo->colormap[2][i]; if (x < minc2) { tdist = (x - minc2) * C2_SCALE; min_dist += tdist * tdist; @@ -788,13 +788,13 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, #define STEP_C2 ((1 << C2_SHIFT) * C2_SCALE) for (i = 0; i < numcolors; i++) { - icolor = GETJSAMPLE(colorlist[i]); + icolor = colorlist[i]; /* Compute (square of) distance from minc0/c1/c2 to this color */ - inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE; + inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE; dist0 = inc0 * inc0; - inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE; + inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE; dist0 += inc1 * inc1; - inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE; + inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE; dist0 += inc2 * inc2; /* Form the initial difference increments */ inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0; @@ -879,7 +879,7 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2) for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) { cachep = &histogram[c0 + ic0][c1 + ic1][c2]; for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) { - *cachep++ = (histcell)(GETJSAMPLE(*cptr++) + 1); + *cachep++ = (histcell)((*cptr++) + 1); } } } @@ -909,9 +909,9 @@ pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, outptr = output_buf[row]; for (col = width; col > 0; col--) { /* get pixel value and index into the cache */ - c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT; - c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT; - c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT; + c0 = (*inptr++) >> C0_SHIFT; + c1 = (*inptr++) >> C1_SHIFT; + c2 = (*inptr++) >> C2_SHIFT; cachep = &histogram[c0][c1][c2]; /* If we have not seen this color before, find nearest colormap entry */ /* and update the cache */ @@ -996,12 +996,12 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, * The maximum error is +- MAXJSAMPLE (or less with error limiting); * this sets the required size of the range_limit array. */ - cur0 += GETJSAMPLE(inptr[0]); - cur1 += GETJSAMPLE(inptr[1]); - cur2 += GETJSAMPLE(inptr[2]); - cur0 = GETJSAMPLE(range_limit[cur0]); - cur1 = GETJSAMPLE(range_limit[cur1]); - cur2 = GETJSAMPLE(range_limit[cur2]); + cur0 += inptr[0]; + cur1 += inptr[1]; + cur2 += inptr[2]; + cur0 = range_limit[cur0]; + cur1 = range_limit[cur1]; + cur2 = range_limit[cur2]; /* Index into the cache with adjusted pixel value */ cachep = &histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT]; @@ -1015,9 +1015,9 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, register int pixcode = *cachep - 1; *outptr = (JSAMPLE)pixcode; /* Compute representation error for this pixel */ - cur0 -= GETJSAMPLE(colormap0[pixcode]); - cur1 -= GETJSAMPLE(colormap1[pixcode]); - cur2 -= GETJSAMPLE(colormap2[pixcode]); + cur0 -= colormap0[pixcode]; + cur1 -= colormap1[pixcode]; + cur2 -= colormap2[pixcode]; } /* Compute error fractions to be propagated to adjacent pixels. * Add these into the running sums, and simultaneously shift the diff --git a/3rdparty/libjpeg-turbo/src/jsimd.h b/3rdparty/libjpeg-turbo/src/jsimd.h index 51e2b8c89d..6c203655ef 100644 --- a/3rdparty/libjpeg-turbo/src/jsimd.h +++ b/3rdparty/libjpeg-turbo/src/jsimd.h @@ -4,6 +4,7 @@ * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2011, 2014, D. R. Commander. * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -75,6 +76,7 @@ EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo, EXTERN(int) jsimd_can_h2v2_fancy_upsample(void); EXTERN(int) jsimd_can_h2v1_fancy_upsample(void); +EXTERN(int) jsimd_can_h1v2_fancy_upsample(void); EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, @@ -84,6 +86,10 @@ EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(int) jsimd_can_h2v2_merged_upsample(void); EXTERN(int) jsimd_can_h2v1_merged_upsample(void); diff --git a/3rdparty/libjpeg-turbo/src/jsimd_none.c b/3rdparty/libjpeg-turbo/src/jsimd_none.c index 3cb6c80f8a..5b38a9fb5c 100644 --- a/3rdparty/libjpeg-turbo/src/jsimd_none.c +++ b/3rdparty/libjpeg-turbo/src/jsimd_none.c @@ -4,6 +4,7 @@ * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2009-2011, 2014, D. R. Commander. * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -169,6 +170,12 @@ jsimd_can_h2v1_fancy_upsample(void) return 0; } +GLOBAL(int) +jsimd_can_h1v2_fancy_upsample(void) +{ + return 0; +} + GLOBAL(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) @@ -181,6 +188,12 @@ jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, { } +GLOBAL(void) +jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + GLOBAL(int) jsimd_can_h2v2_merged_upsample(void) { diff --git a/3rdparty/libjpeg-turbo/src/jversion.h b/3rdparty/libjpeg-turbo/src/jversion.h index 4462b94104..2ab534af41 100644 --- a/3rdparty/libjpeg-turbo/src/jversion.h +++ b/3rdparty/libjpeg-turbo/src/jversion.h @@ -2,9 +2,9 @@ * jversion.h * * This file was part of the Independent JPEG Group's software: - * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. + * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2012-2020, D. R. Commander. + * Copyright (C) 2010, 2012-2021, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -37,9 +37,9 @@ */ #define JCOPYRIGHT \ - "Copyright (C) 2009-2020 D. R. Commander\n" \ + "Copyright (C) 2009-2021 D. R. Commander\n" \ "Copyright (C) 2015, 2020 Google, Inc.\n" \ - "Copyright (C) 2019 Arm Limited\n" \ + "Copyright (C) 2019-2020 Arm Limited\n" \ "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \ "Copyright (C) 2011-2016 Siarhei Siamashka\n" \ "Copyright (C) 2015 Intel Corporation\n" \ @@ -48,7 +48,7 @@ "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \ "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \ "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ - "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding" + "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding" #define JCOPYRIGHT_SHORT \ - "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others" + "Copyright (C) 1991-2021 The libjpeg-turbo Project and many others"