Merge branch 4.x

pull/20216/head
Alexander Alekhin 4 years ago
commit b91e0dca90
  1. 7
      3rdparty/libjpeg-turbo/CMakeLists.txt
  2. 2
      3rdparty/libjpeg-turbo/LICENSE.md
  3. 15
      3rdparty/libjpeg-turbo/README.ijg
  4. 2
      3rdparty/libjpeg-turbo/README.md
  5. 5
      3rdparty/libjpeg-turbo/jconfig.h.in
  6. 1
      3rdparty/libjpeg-turbo/jconfig.h.win.in
  7. 18
      3rdparty/libjpeg-turbo/src/jccolext.c
  8. 12
      3rdparty/libjpeg-turbo/src/jccolor.c
  9. 37
      3rdparty/libjpeg-turbo/src/jcdctmgr.c
  10. 386
      3rdparty/libjpeg-turbo/src/jchuff.c
  11. 25
      3rdparty/libjpeg-turbo/src/jcphuff.c
  12. 63
      3rdparty/libjpeg-turbo/src/jcsample.c
  13. 10
      3rdparty/libjpeg-turbo/src/jdapistd.c
  14. 15
      3rdparty/libjpeg-turbo/src/jdarith.c
  15. 290
      3rdparty/libjpeg-turbo/src/jdcoefct.c
  16. 3
      3rdparty/libjpeg-turbo/src/jdcoefct.h
  17. 96
      3rdparty/libjpeg-turbo/src/jdcol565.c
  18. 8
      3rdparty/libjpeg-turbo/src/jdcolext.c
  19. 14
      3rdparty/libjpeg-turbo/src/jdcolor.c
  20. 60
      3rdparty/libjpeg-turbo/src/jdhuff.c
  21. 13
      3rdparty/libjpeg-turbo/src/jdhuff.h
  22. 32
      3rdparty/libjpeg-turbo/src/jdicc.c
  23. 67
      3rdparty/libjpeg-turbo/src/jdmarker.c
  24. 15
      3rdparty/libjpeg-turbo/src/jdmaster.c
  25. 68
      3rdparty/libjpeg-turbo/src/jdmrg565.c
  26. 34
      3rdparty/libjpeg-turbo/src/jdmrgext.c
  27. 48
      3rdparty/libjpeg-turbo/src/jdphuff.c
  28. 38
      3rdparty/libjpeg-turbo/src/jdsample.c
  29. 13
      3rdparty/libjpeg-turbo/src/jerror.h
  30. 8
      3rdparty/libjpeg-turbo/src/jidctint.c
  31. 35
      3rdparty/libjpeg-turbo/src/jmorecfg.h
  32. 5
      3rdparty/libjpeg-turbo/src/jpegint.h
  33. 27
      3rdparty/libjpeg-turbo/src/jquant1.c
  34. 46
      3rdparty/libjpeg-turbo/src/jquant2.c
  35. 6
      3rdparty/libjpeg-turbo/src/jsimd.h
  36. 13
      3rdparty/libjpeg-turbo/src/jsimd_none.c
  37. 12
      3rdparty/libjpeg-turbo/src/jversion.h
  38. 27
      CMakeLists.txt
  39. 7
      cmake/OpenCVCompilerOptions.cmake
  40. 6
      cmake/OpenCVDetectInferenceEngine.cmake
  41. 2
      cmake/OpenCVDownload.cmake
  42. 37
      cmake/OpenCVFindLibsGUI.cmake
  43. 8
      cmake/OpenCVFindOpenEXR.cmake
  44. 1
      cmake/OpenCVMinDepVersions.cmake
  45. 13
      cmake/OpenCVModule.cmake
  46. 49
      cmake/OpenCVUtils.cmake
  47. 15
      cmake/templates/cvconfig.h.in
  48. 2
      doc/Doxyfile.in
  49. 18
      doc/opencv.bib
  50. 8
      doc/py_tutorials/py_feature2d/py_features_harris/py_features_harris.markdown
  51. 4
      doc/py_tutorials/py_feature2d/py_shi_tomasi/py_shi_tomasi.markdown
  52. 2
      doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown
  53. 11
      doc/tools/html_functions.py
  54. 2
      doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown
  55. 28
      doc/tutorials/introduction/config_reference/config_reference.markdown
  56. 31
      doc/tutorials/introduction/general_install/general_install.markdown
  57. 2
      doc/tutorials/introduction/linux_install/linux_install.markdown
  58. 27
      modules/3d/include/opencv2/3d.hpp
  59. 80
      modules/3d/src/ptsetreg.cpp
  60. 9
      modules/3d/src/solvepnp.cpp
  61. 22
      modules/3d/src/usac/ransac_solvers.cpp
  62. 22
      modules/3d/test/test_affine3d_estimator.cpp
  63. 19
      modules/3d/test/test_usac.cpp
  64. 21
      modules/calib/include/opencv2/calib.hpp
  65. 9
      modules/core/include/opencv2/core/cv_cpu_dispatch.h
  66. 6
      modules/core/include/opencv2/core/cvdef.h
  67. 6
      modules/core/include/opencv2/core/hal/intrin.hpp
  68. 244
      modules/core/include/opencv2/core/hal/intrin_neon.hpp
  69. 2545
      modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
  70. 17
      modules/core/include/opencv2/core/mat.hpp
  71. 91
      modules/core/include/opencv2/core/mat.inl.hpp
  72. 23
      modules/core/include/opencv2/core/ocl.hpp
  73. 26
      modules/core/include/opencv2/core/types.hpp
  74. 4
      modules/core/include/opencv2/core/utils/plugin_loader.private.hpp
  75. 1
      modules/core/misc/objc/common/Point2i.h
  76. 1
      modules/core/misc/objc/common/Rect2i.h
  77. 1
      modules/core/misc/objc/common/Size2i.h
  78. 11
      modules/core/misc/objc/common/Typealiases.swift
  79. 18
      modules/core/misc/objc/gen_dict.json
  80. 2
      modules/core/src/arithm.simd.hpp
  81. 968
      modules/core/src/directx.cpp
  82. 23
      modules/core/src/directx.hpp
  83. 42
      modules/core/src/glob.cpp
  84. 82
      modules/core/src/ocl.cpp
  85. 7
      modules/core/src/ocl_disabled.impl.hpp
  86. 2
      modules/core/src/precomp.hpp
  87. 43
      modules/core/src/system.cpp
  88. 19
      modules/core/src/utils/datafile.cpp
  89. 8
      modules/core/src/utils/samples.cpp
  90. 98
      modules/core/src/va_intel.cpp
  91. 67
      modules/core/test/ocl/test_opencl.cpp
  92. 21
      modules/core/test/test_intrin_utils.hpp
  93. 94
      modules/core/test/test_mat.cpp
  94. 4
      modules/core/test/test_utils.cpp
  95. 12
      modules/dnn/CMakeLists.txt
  96. 6
      modules/dnn/include/opencv2/dnn/dnn.hpp
  97. 20
      modules/dnn/src/ie_ngraph.cpp
  98. 5
      modules/dnn/src/layers/crop_and_resize_layer.cpp
  99. 45
      modules/features2d/include/opencv2/features2d.hpp
  100. 12
      modules/flann/include/opencv2/flann/any.h
  101. Some files were not shown because too many files have changed in this diff Show More

@ -3,10 +3,10 @@ project(${JPEG_LIBRARY} C)
ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-64-to-32 -Wimplicit-fallthrough)
set(VERSION_MAJOR 2)
set(VERSION_MINOR 0)
set(VERSION_REVISION 6)
set(VERSION_MINOR 1)
set(VERSION_REVISION 0)
set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
set(LIBJPEG_TURBO_VERSION_NUMBER 2000006)
set(LIBJPEG_TURBO_VERSION_NUMBER 2001000)
string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@ -46,7 +46,6 @@ if(UNIX)
ocv_update(HAVE_UNSIGNED_SHORT 1)
# undef INCOMPLETE_TYPES_BROKEN
ocv_update(RIGHT_SHIFT_IS_UNSIGNED 0)
ocv_update(__CHAR_UNSIGNED__ 0)
endif()

@ -91,7 +91,7 @@ best of our understanding.
The Modified (3-clause) BSD License
===================================
Copyright (C)2009-2020 D. R. Commander. All Rights Reserved.
Copyright (C)2009-2021 D. R. Commander. All Rights Reserved.<br>
Copyright (C)2015 Viktor Szathmáry. All Rights Reserved.
Redistribution and use in source and binary forms, with or without

@ -128,7 +128,7 @@ with respect to this software, its quality, accuracy, merchantability, or
fitness for a particular purpose. This software is provided "AS IS", and you,
its user, assume the entire risk as to its quality and accuracy.
This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding.
This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
All Rights Reserved except as specified below.
Permission is hereby granted to use, copy, modify, and distribute this
@ -159,19 +159,6 @@ commercial products, provided that all warranty or liability claims are
assumed by the product vendor.
The IJG distribution formerly included code to read and write GIF files.
To avoid entanglement with the Unisys LZW patent (now expired), GIF reading
support has been removed altogether, and the GIF writer has been simplified
to produce "uncompressed GIFs". This technique does not use the LZW
algorithm; the resulting GIF files are larger than usual, but are readable
by all standard GIF decoders.
We are required to state that
"The Graphics Interchange Format(c) is the Copyright property of
CompuServe Incorporated. GIF(sm) is a Service Mark property of
CompuServe Incorporated."
REFERENCES
==========

@ -3,7 +3,7 @@ Background
libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
MIPS systems, as well as progressive JPEG compression on x86 and x86-64
MIPS systems, as well as progressive JPEG compression on x86, x86-64, and Arm
systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
all else being equal. On other types of systems, libjpeg-turbo can still
outperform libjpeg by a significant amount, by virtue of its highly-optimized

@ -61,11 +61,6 @@
unsigned. */
#cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
/* Define to 1 if type `char' is unsigned and you are not using gcc. */
#ifndef __CHAR_UNSIGNED__
#cmakedefine __CHAR_UNSIGNED__ 1
#endif
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */

@ -18,7 +18,6 @@
#define HAVE_UNSIGNED_SHORT
#undef INCOMPLETE_TYPES_BROKEN
#undef RIGHT_SHIFT_IS_UNSIGNED
#undef __CHAR_UNSIGNED__
/* Define "boolean" as unsigned char, not int, per Windows custom */
#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */

@ -48,9 +48,9 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr2 = output_buf[2][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
r = GETJSAMPLE(inptr[RGB_RED]);
g = GETJSAMPLE(inptr[RGB_GREEN]);
b = GETJSAMPLE(inptr[RGB_BLUE]);
r = inptr[RGB_RED];
g = inptr[RGB_GREEN];
b = inptr[RGB_BLUE];
inptr += RGB_PIXELSIZE;
/* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
* must be too; we do not need an explicit range-limiting operation.
@ -100,9 +100,9 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr = output_buf[0][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
r = GETJSAMPLE(inptr[RGB_RED]);
g = GETJSAMPLE(inptr[RGB_GREEN]);
b = GETJSAMPLE(inptr[RGB_BLUE]);
r = inptr[RGB_RED];
g = inptr[RGB_GREEN];
b = inptr[RGB_BLUE];
inptr += RGB_PIXELSIZE;
/* Y */
outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
@ -135,9 +135,9 @@ rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr2 = output_buf[2][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
outptr0[col] = GETJSAMPLE(inptr[RGB_RED]);
outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]);
outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]);
outptr0[col] = inptr[RGB_RED];
outptr1[col] = inptr[RGB_GREEN];
outptr2[col] = inptr[RGB_BLUE];
inptr += RGB_PIXELSIZE;
}
}

@ -392,11 +392,11 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr3 = output_buf[3][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
r = MAXJSAMPLE - inptr[0];
g = MAXJSAMPLE - inptr[1];
b = MAXJSAMPLE - inptr[2];
/* K passes through as-is */
outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */
outptr3[col] = inptr[3];
inptr += 4;
/* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
* must be too; we do not need an explicit range-limiting operation.
@ -438,7 +438,7 @@ grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
outptr = output_buf[0][output_row];
output_row++;
for (col = 0; col < num_cols; col++) {
outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */
outptr[col] = inptr[0];
inptr += instride;
}
}
@ -497,7 +497,7 @@ null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
inptr = *input_buf;
outptr = output_buf[ci][output_row];
for (col = 0; col < num_cols; col++) {
outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
outptr[col] = inptr[ci];
inptr += nc;
}
}

@ -381,19 +381,19 @@ convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
#else
{
register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--)
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
*workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
}
#endif
}
@ -533,20 +533,19 @@ convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
for (elemr = 0; elemr < DCTSIZE; elemr++) {
elemptr = sample_data[elemr] + start_col;
#if DCTSIZE == 8 /* unroll the inner loop */
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
#else
{
register int elemc;
for (elemc = DCTSIZE; elemc > 0; elemc--)
*workspaceptr++ = (FAST_FLOAT)
(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
*workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
}
#endif
}

@ -4,8 +4,10 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
* Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander.
* Copyright (C) 2015, Matthieu Darbois.
* Copyright (C) 2018, Matthias Räncker.
* Copyright (C) 2020, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -42,15 +44,19 @@
* flags (this defines __thumb__).
*/
/* NOTE: Both GCC and Clang define __GNUC__ */
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
defined(_M_ARM64)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
#ifdef USE_CLZ_INTRINSIC
#if defined(_MSC_VER) && !defined(__clang__)
#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
#else
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
#endif
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
#else
#include "jpeg_nbits_table.h"
@ -65,31 +71,42 @@
* but must not be updated permanently until we complete the MCU.
*/
typedef struct {
size_t put_buffer; /* current bit-accumulation buffer */
int put_bits; /* # of bits now in it */
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
#if defined(__x86_64__) && defined(__ILP32__)
typedef unsigned long long bit_buf_type;
#else
typedef size_t bit_buf_type;
#endif
/* This macro is to work around compilers with missing or broken
* structure assignment. You'll need to fix this code if you have
* such a compiler and you change MAX_COMPS_IN_SCAN.
/* NOTE: The more optimal Huffman encoding algorithm is only used by the
* intrinsics implementation of the Arm Neon SIMD extensions, which is why we
* retain the old Huffman encoder behavior when using the GAS implementation.
*/
#ifndef NO_STRUCT_ASSIGN
#define ASSIGN_STATE(dest, src) ((dest) = (src))
#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \
defined(_M_ARM) || defined(_M_ARM64))
typedef unsigned long long simd_bit_buf_type;
#else
#if MAX_COMPS_IN_SCAN == 4
#define ASSIGN_STATE(dest, src) \
((dest).put_buffer = (src).put_buffer, \
(dest).put_bits = (src).put_bits, \
(dest).last_dc_val[0] = (src).last_dc_val[0], \
(dest).last_dc_val[1] = (src).last_dc_val[1], \
(dest).last_dc_val[2] = (src).last_dc_val[2], \
(dest).last_dc_val[3] = (src).last_dc_val[3])
typedef bit_buf_type simd_bit_buf_type;
#endif
#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \
(defined(__x86_64__) && defined(__ILP32__))
#define BIT_BUF_SIZE 64
#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32)
#define BIT_BUF_SIZE 32
#else
#error Cannot determine word size
#endif
#define SIMD_BIT_BUF_SIZE (sizeof(simd_bit_buf_type) * 8)
typedef struct {
union {
bit_buf_type c;
simd_bit_buf_type simd;
} put_buffer; /* current bit accumulation buffer */
int free_bits; /* # of bits available in it */
/* (Neon GAS: # of bits now in it) */
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
typedef struct {
struct jpeg_entropy_encoder pub; /* public fields */
@ -123,6 +140,7 @@ typedef struct {
size_t free_in_buffer; /* # of byte spaces remaining in buffer */
savable_state cur; /* Current bit buffer & DC state */
j_compress_ptr cinfo; /* dump_buffer needs access to this */
int simd;
} working_state;
@ -201,8 +219,17 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
}
/* Initialize bit buffer to empty */
entropy->saved.put_buffer = 0;
entropy->saved.put_bits = 0;
if (entropy->simd) {
entropy->saved.put_buffer.simd = 0;
#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
entropy->saved.free_bits = 0;
#else
entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;
#endif
} else {
entropy->saved.put_buffer.c = 0;
entropy->saved.free_bits = BIT_BUF_SIZE;
}
/* Initialize restart stuff */
entropy->restarts_to_go = cinfo->restart_interval;
@ -287,6 +314,7 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
* this lets us detect duplicate VAL entries here, and later
* allows emit_bits to detect any attempt to emit such symbols.
*/
MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco));
MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
/* This is also a convenient place to check for out-of-range
@ -334,94 +362,94 @@ dump_buffer(working_state *state)
/* Outputting bits to the file */
/* These macros perform the same task as the emit_bits() function in the
* original libjpeg code. In addition to reducing overhead by explicitly
* inlining the code, additional performance is achieved by taking into
* account the size of the bit buffer and waiting until it is almost full
* before emptying it. This mostly benefits 64-bit platforms, since 6
* bytes can be stored in a 64-bit bit buffer before it has to be emptied.
/* Output byte b and, speculatively, an additional 0 byte. 0xFF must be
* encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the
* byte is 0xFF. Otherwise, the output buffer pointer is advanced by 1, and
* the speculative 0 byte will be overwritten by the next byte.
*/
#define EMIT_BYTE() { \
JOCTET c; \
put_bits -= 8; \
c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
*buffer++ = c; \
if (c == 0xFF) /* need to stuff a zero byte? */ \
*buffer++ = 0; \
#define EMIT_BYTE(b) { \
buffer[0] = (JOCTET)(b); \
buffer[1] = 0; \
buffer -= -2 + ((JOCTET)(b) < 0xFF); \
}
#define PUT_BITS(code, size) { \
put_bits += size; \
put_buffer = (put_buffer << size) | code; \
}
#if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
#define CHECKBUF15() { \
if (put_bits > 15) { \
EMIT_BYTE() \
EMIT_BYTE() \
/* Output the entire bit buffer. If there are no 0xFF bytes in it, then write
* directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to
* encode 0xFF as 0xFF 0x00.
*/
#if BIT_BUF_SIZE == 64
#define FLUSH() { \
if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \
EMIT_BYTE(put_buffer >> 56) \
EMIT_BYTE(put_buffer >> 48) \
EMIT_BYTE(put_buffer >> 40) \
EMIT_BYTE(put_buffer >> 32) \
EMIT_BYTE(put_buffer >> 24) \
EMIT_BYTE(put_buffer >> 16) \
EMIT_BYTE(put_buffer >> 8) \
EMIT_BYTE(put_buffer ) \
} else { \
buffer[0] = (JOCTET)(put_buffer >> 56); \
buffer[1] = (JOCTET)(put_buffer >> 48); \
buffer[2] = (JOCTET)(put_buffer >> 40); \
buffer[3] = (JOCTET)(put_buffer >> 32); \
buffer[4] = (JOCTET)(put_buffer >> 24); \
buffer[5] = (JOCTET)(put_buffer >> 16); \
buffer[6] = (JOCTET)(put_buffer >> 8); \
buffer[7] = (JOCTET)(put_buffer); \
buffer += 8; \
} \
}
#endif
#define CHECKBUF31() { \
if (put_bits > 31) { \
EMIT_BYTE() \
EMIT_BYTE() \
EMIT_BYTE() \
EMIT_BYTE() \
} \
}
#else
#define CHECKBUF47() { \
if (put_bits > 47) { \
EMIT_BYTE() \
EMIT_BYTE() \
EMIT_BYTE() \
EMIT_BYTE() \
EMIT_BYTE() \
EMIT_BYTE() \
#define FLUSH() { \
if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
EMIT_BYTE(put_buffer >> 24) \
EMIT_BYTE(put_buffer >> 16) \
EMIT_BYTE(put_buffer >> 8) \
EMIT_BYTE(put_buffer ) \
} else { \
buffer[0] = (JOCTET)(put_buffer >> 24); \
buffer[1] = (JOCTET)(put_buffer >> 16); \
buffer[2] = (JOCTET)(put_buffer >> 8); \
buffer[3] = (JOCTET)(put_buffer); \
buffer += 4; \
} \
}
#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T)
#error Cannot determine word size
#endif
#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
#define EMIT_BITS(code, size) { \
CHECKBUF47() \
PUT_BITS(code, size) \
}
#define EMIT_CODE(code, size) { \
temp2 &= (((JLONG)1) << nbits) - 1; \
CHECKBUF31() \
PUT_BITS(code, size) \
PUT_BITS(temp2, nbits) \
/* Fill the bit buffer to capacity with the leading bits from code, then output
* the bit buffer and put the remaining bits from code into the bit buffer.
*/
#define PUT_AND_FLUSH(code, size) { \
put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \
FLUSH() \
free_bits += BIT_BUF_SIZE; \
put_buffer = code; \
}
#else
#define EMIT_BITS(code, size) { \
PUT_BITS(code, size) \
CHECKBUF15() \
/* Insert code into the bit buffer and output the bit buffer if needed.
* NOTE: We can't flush with free_bits == 0, since the left shift in
* PUT_AND_FLUSH() would have undefined behavior.
*/
#define PUT_BITS(code, size) { \
free_bits -= size; \
if (free_bits < 0) \
PUT_AND_FLUSH(code, size) \
else \
put_buffer = (put_buffer << size) | code; \
}
#define EMIT_CODE(code, size) { \
temp2 &= (((JLONG)1) << nbits) - 1; \
PUT_BITS(code, size) \
CHECKBUF15() \
PUT_BITS(temp2, nbits) \
CHECKBUF15() \
#define PUT_CODE(code, size) { \
temp &= (((JLONG)1) << nbits) - 1; \
temp |= code << nbits; \
nbits += size; \
PUT_BITS(temp, nbits) \
}
#endif
/* Although it is exceedingly rare, it is possible for a Huffman-encoded
* coefficient block to be larger than the 128-byte unencoded block. For each
@ -444,6 +472,7 @@ dump_buffer(working_state *state)
#define STORE_BUFFER() { \
if (localbuf) { \
size_t bytes, bytestocopy; \
bytes = buffer - _buffer; \
buffer = _buffer; \
while (bytes > 0) { \
@ -466,20 +495,46 @@ dump_buffer(working_state *state)
LOCAL(boolean)
flush_bits(working_state *state)
{
JOCTET _buffer[BUFSIZE], *buffer;
size_t put_buffer; int put_bits;
size_t bytes, bytestocopy; int localbuf = 0;
JOCTET _buffer[BUFSIZE], *buffer, temp;
simd_bit_buf_type put_buffer; int put_bits;
int localbuf = 0;
if (state->simd) {
#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
put_bits = state->cur.free_bits;
#else
put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;
#endif
put_buffer = state->cur.put_buffer.simd;
} else {
put_bits = BIT_BUF_SIZE - state->cur.free_bits;
put_buffer = state->cur.put_buffer.c;
}
put_buffer = state->cur.put_buffer;
put_bits = state->cur.put_bits;
LOAD_BUFFER()
/* fill any partial byte with ones */
PUT_BITS(0x7F, 7)
while (put_bits >= 8) EMIT_BYTE()
while (put_bits >= 8) {
put_bits -= 8;
temp = (JOCTET)(put_buffer >> put_bits);
EMIT_BYTE(temp)
}
if (put_bits) {
/* fill partial byte with ones */
temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));
EMIT_BYTE(temp)
}
state->cur.put_buffer = 0; /* and reset bit-buffer to empty */
state->cur.put_bits = 0;
if (state->simd) { /* and reset bit buffer to empty */
state->cur.put_buffer.simd = 0;
#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
state->cur.free_bits = 0;
#else
state->cur.free_bits = SIMD_BIT_BUF_SIZE;
#endif
} else {
state->cur.put_buffer.c = 0;
state->cur.free_bits = BIT_BUF_SIZE;
}
STORE_BUFFER()
return TRUE;
@ -493,7 +548,7 @@ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
c_derived_tbl *dctbl, c_derived_tbl *actbl)
{
JOCTET _buffer[BUFSIZE], *buffer;
size_t bytes, bytestocopy; int localbuf = 0;
int localbuf = 0;
LOAD_BUFFER()
@ -509,53 +564,41 @@ LOCAL(boolean)
encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
c_derived_tbl *dctbl, c_derived_tbl *actbl)
{
int temp, temp2, temp3;
int nbits;
int r, code, size;
int temp, nbits, free_bits;
bit_buf_type put_buffer;
JOCTET _buffer[BUFSIZE], *buffer;
size_t put_buffer; int put_bits;
int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
size_t bytes, bytestocopy; int localbuf = 0;
int localbuf = 0;
put_buffer = state->cur.put_buffer;
put_bits = state->cur.put_bits;
free_bits = state->cur.free_bits;
put_buffer = state->cur.put_buffer.c;
LOAD_BUFFER()
/* Encode the DC coefficient difference per section F.1.2.1 */
temp = temp2 = block[0] - last_dc_val;
temp = block[0] - last_dc_val;
/* This is a well-known technique for obtaining the absolute value without a
* branch. It is derived from an assembly language technique presented in
* "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
* Agner Fog.
* Agner Fog. This code assumes we are on a two's complement machine.
*/
temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
temp ^= temp3;
temp -= temp3;
/* For a negative input, want temp2 = bitwise complement of abs(input) */
/* This code assumes we are on a two's complement machine */
temp2 += temp3;
nbits = temp >> (CHAR_BIT * sizeof(int) - 1);
temp += nbits;
nbits ^= temp;
/* Find the number of bits needed for the magnitude of the coefficient */
nbits = JPEG_NBITS(temp);
/* Emit the Huffman-coded symbol for the number of bits */
code = dctbl->ehufco[nbits];
size = dctbl->ehufsi[nbits];
EMIT_BITS(code, size)
nbits = JPEG_NBITS(nbits);
/* Mask off any extra bits in code */
temp2 &= (((JLONG)1) << nbits) - 1;
/* Emit that number of bits of the value, if positive, */
/* or the complement of its magnitude, if negative. */
EMIT_BITS(temp2, nbits)
/* Emit the Huffman-coded symbol for the number of bits.
* Emit that number of bits of the value, if positive,
* or the complement of its magnitude, if negative.
*/
PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])
/* Encode the AC coefficients per section F.1.2.2 */
r = 0; /* r = run length of zeros */
{
int r = 0; /* r = run length of zeros */
/* Manually unroll the k loop to eliminate the counter variable. This
* improves performance greatly on systems with a limited number of
@ -563,51 +606,46 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
*/
#define kloop(jpeg_natural_order_of_k) { \
if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
r++; \
r += 16; \
} else { \
temp2 = temp; \
/* Branch-less absolute value, bitwise complement, etc., same as above */ \
temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
temp ^= temp3; \
temp -= temp3; \
temp2 += temp3; \
nbits = JPEG_NBITS_NONZERO(temp); \
nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \
temp += nbits; \
nbits ^= temp; \
nbits = JPEG_NBITS_NONZERO(nbits); \
/* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
while (r > 15) { \
EMIT_BITS(code_0xf0, size_0xf0) \
r -= 16; \
while (r >= 16 * 16) { \
r -= 16 * 16; \
PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \
} \
/* Emit Huffman symbol for run length / number of bits */ \
temp3 = (r << 4) + nbits; \
code = actbl->ehufco[temp3]; \
size = actbl->ehufsi[temp3]; \
EMIT_CODE(code, size) \
r += nbits; \
PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \
r = 0; \
} \
}
/* One iteration for each value in jpeg_natural_order[] */
kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);
kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);
kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);
kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);
kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);
kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);
kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);
kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);
kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);
kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);
kloop(55); kloop(62); kloop(63);
/* If the last coef(s) were zero, emit an end-of-block code */
if (r > 0) {
code = actbl->ehufco[0];
size = actbl->ehufsi[0];
EMIT_BITS(code, size)
/* One iteration for each value in jpeg_natural_order[] */
kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3);
kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18);
kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26);
kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27);
kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21);
kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57);
kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15);
kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58);
kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39);
kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47);
kloop(55); kloop(62); kloop(63);
/* If the last coef(s) were zero, emit an end-of-block code */
if (r > 0) {
PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])
}
}
state->cur.put_buffer = put_buffer;
state->cur.put_bits = put_bits;
state->cur.put_buffer.c = put_buffer;
state->cur.free_bits = free_bits;
STORE_BUFFER()
return TRUE;
@ -654,8 +692,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
state.next_output_byte = cinfo->dest->next_output_byte;
state.free_in_buffer = cinfo->dest->free_in_buffer;
ASSIGN_STATE(state.cur, entropy->saved);
state.cur = entropy->saved;
state.cinfo = cinfo;
state.simd = entropy->simd;
/* Emit restart marker if needed */
if (cinfo->restart_interval) {
@ -694,7 +733,7 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
/* Completed MCU, so update state */
cinfo->dest->next_output_byte = state.next_output_byte;
cinfo->dest->free_in_buffer = state.free_in_buffer;
ASSIGN_STATE(entropy->saved, state.cur);
entropy->saved = state.cur;
/* Update restart-interval state too */
if (cinfo->restart_interval) {
@ -723,8 +762,9 @@ finish_pass_huff(j_compress_ptr cinfo)
/* Load up working state ... flush_bits needs it */
state.next_output_byte = cinfo->dest->next_output_byte;
state.free_in_buffer = cinfo->dest->free_in_buffer;
ASSIGN_STATE(state.cur, entropy->saved);
state.cur = entropy->saved;
state.cinfo = cinfo;
state.simd = entropy->simd;
/* Flush out the last data */
if (!flush_bits(&state))
@ -733,7 +773,7 @@ finish_pass_huff(j_compress_ptr cinfo)
/* Update state */
cinfo->dest->next_output_byte = state.next_output_byte;
cinfo->dest->free_in_buffer = state.free_in_buffer;
ASSIGN_STATE(entropy->saved, state.cur);
entropy->saved = state.cur;
}

@ -4,8 +4,9 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1995-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2011, 2015, 2018, D. R. Commander.
* Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
* Copyright (C) 2016, 2018, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -51,15 +52,19 @@
* flags (this defines __thumb__).
*/
/* NOTE: Both GCC and Clang define __GNUC__ */
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
defined(_M_ARM64)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
#ifdef USE_CLZ_INTRINSIC
#if defined(_MSC_VER) && !defined(__clang__)
#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
#else
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
#endif
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
#else
#include "jpeg_nbits_table.h"
@ -169,24 +174,26 @@ INLINE
METHODDEF(int)
count_zeroes(size_t *x)
{
int result;
#if defined(HAVE_BUILTIN_CTZL)
int result;
result = __builtin_ctzl(*x);
*x >>= result;
#elif defined(HAVE_BITSCANFORWARD64)
unsigned long result;
_BitScanForward64(&result, *x);
*x >>= result;
#elif defined(HAVE_BITSCANFORWARD)
unsigned long result;
_BitScanForward(&result, *x);
*x >>= result;
#else
result = 0;
int result = 0;
while ((*x & 1) == 0) {
++result;
*x >>= 1;
}
#endif
return result;
return (int)result;
}
@ -860,7 +867,7 @@ encode_mcu_AC_refine_prepare(const JCOEF *block,
#define ENCODE_COEFS_AC_REFINE(label) { \
while (zerobits) { \
int idx = count_zeroes(&zerobits); \
idx = count_zeroes(&zerobits); \
r += idx; \
cabsvalue += idx; \
signbits >>= idx; \
@ -917,7 +924,7 @@ METHODDEF(boolean)
encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
{
phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
register int temp, r;
register int temp, r, idx;
char *BR_buffer;
unsigned int BR;
int Sl = cinfo->Se - cinfo->Ss + 1;
@ -968,7 +975,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
if (zerobits) {
int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
int idx = count_zeroes(&zerobits);
idx = count_zeroes(&zerobits);
signbits >>= idx;
idx += diff;
r += idx;

@ -6,7 +6,7 @@
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, D. R. Commander.
* Copyright (C) 2015, 2019, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -103,7 +103,7 @@ expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
if (numcols > 0) {
for (row = 0; row < num_rows; row++) {
ptr = image_data[row] + input_cols;
pixval = ptr[-1]; /* don't need GETJSAMPLE() here */
pixval = ptr[-1];
for (count = numcols; count > 0; count--)
*ptr++ = pixval;
}
@ -174,7 +174,7 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
for (v = 0; v < v_expand; v++) {
inptr = input_data[inrow + v] + outcol_h;
for (h = 0; h < h_expand; h++) {
outvalue += (JLONG)GETJSAMPLE(*inptr++);
outvalue += (JLONG)(*inptr++);
}
}
*outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix);
@ -237,8 +237,7 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
inptr = input_data[outrow];
bias = 0; /* bias = 0,1,0,1,... for successive samples */
for (outcol = 0; outcol < output_cols; outcol++) {
*outptr++ =
(JSAMPLE)((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) + bias) >> 1);
*outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
bias ^= 1; /* 0=>1, 1=>0 */
inptr += 2;
}
@ -277,8 +276,7 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
bias = 1; /* bias = 1,2,1,2,... for successive samples */
for (outcol = 0; outcol < output_cols; outcol++) {
*outptr++ =
(JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2);
(JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
bias ^= 3; /* 1=>2, 2=>1 */
inptr0 += 2; inptr1 += 2;
}
@ -337,33 +335,25 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
below_ptr = input_data[inrow + 2];
/* Special case for first column: pretend column -1 is same as column 0 */
membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2];
neighsum += neighsum;
neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2];
membersum = membersum * memberscale + neighsum * neighscale;
*outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
for (colctr = output_cols - 2; colctr > 0; colctr--) {
/* sum of pixels directly mapped to this output element */
membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
/* sum of edge-neighbor pixels */
neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2];
/* The edge-neighbors count twice as much as corner-neighbors */
neighsum += neighsum;
/* Add in the corner-neighbors */
neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2];
/* form final output scaled up by 2^16 */
membersum = membersum * memberscale + neighsum * neighscale;
/* round, descale and output it */
@ -372,15 +362,11 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
}
/* Special case for last column */
membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1];
neighsum += neighsum;
neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1];
membersum = membersum * memberscale + neighsum * neighscale;
*outptr = (JSAMPLE)((membersum + 32768) >> 16);
@ -429,21 +415,18 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
below_ptr = input_data[outrow + 1];
/* Special case for first column */
colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
GETJSAMPLE(*inptr);
membersum = GETJSAMPLE(*inptr++);
nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
GETJSAMPLE(*inptr);
colsum = (*above_ptr++) + (*below_ptr++) + inptr[0];
membersum = *inptr++;
nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
neighsum = colsum + (colsum - membersum) + nextcolsum;
membersum = membersum * memberscale + neighsum * neighscale;
*outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
lastcolsum = colsum; colsum = nextcolsum;
for (colctr = output_cols - 2; colctr > 0; colctr--) {
membersum = GETJSAMPLE(*inptr++);
membersum = *inptr++;
above_ptr++; below_ptr++;
nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
GETJSAMPLE(*inptr);
nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
membersum = membersum * memberscale + neighsum * neighscale;
*outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
@ -451,7 +434,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
}
/* Special case for last column */
membersum = GETJSAMPLE(*inptr);
membersum = *inptr;
neighsum = lastcolsum + (colsum - membersum) + colsum;
membersum = membersum * memberscale + neighsum * neighscale;
*outptr = (JSAMPLE)((membersum + 32768) >> 16);

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1994-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
* Copyright (C) 2010, 2015-2020, D. R. Commander.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@ -319,6 +319,8 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
{
JDIMENSION n;
my_master_ptr master = (my_master_ptr)cinfo->master;
JSAMPLE dummy_sample[1] = { 0 };
JSAMPROW dummy_row = dummy_sample;
JSAMPARRAY scanlines = NULL;
void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION input_row, JSAMPARRAY output_buf,
@ -329,6 +331,10 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
if (cinfo->cconvert && cinfo->cconvert->color_convert) {
color_convert = cinfo->cconvert->color_convert;
cinfo->cconvert->color_convert = noop_convert;
/* This just prevents UBSan from complaining about adding 0 to a NULL
* pointer. The pointer isn't actually used.
*/
scanlines = &dummy_row;
}
if (cinfo->cquantize && cinfo->cquantize->color_quantize) {
@ -532,6 +538,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
* decoded coefficients. This is ~5% faster for large subsets, but
* it's tough to tell a difference for smaller images.
*/
if (!cinfo->entropy->insufficient_data)
cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
(*cinfo->entropy->decode_mcu) (cinfo, NULL);
}
}

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Developed 1997-2015 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2015-2018, D. R. Commander.
* Copyright (C) 2015-2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -80,7 +80,7 @@ get_byte(j_decompress_ptr cinfo)
if (!(*src->fill_input_buffer) (cinfo))
ERREXIT(cinfo, JERR_CANT_SUSPEND);
src->bytes_in_buffer--;
return GETJOCTET(*src->next_input_byte++);
return *src->next_input_byte++;
}
@ -665,8 +665,16 @@ bad:
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
int *coef_bit_ptr = &cinfo->coef_bits[cindex][0];
int *prev_coef_bit_ptr =
&cinfo->coef_bits[cindex + cinfo->num_components][0];
if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
if (cinfo->input_scan_number > 1)
prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
else
prev_coef_bit_ptr[coefi] = 0;
}
for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
if (cinfo->Ah != expected)
@ -727,6 +735,7 @@ bad:
entropy->c = 0;
entropy->a = 0;
entropy->ct = -16; /* force reading 2 initial bytes to fill C */
entropy->pub.insufficient_data = FALSE;
/* Initialize restart counter */
entropy->restarts_to_go = cinfo->restart_interval;
@ -763,7 +772,7 @@ jinit_arith_decoder(j_decompress_ptr cinfo)
int *coef_bit_ptr, ci;
cinfo->coef_bits = (int (*)[DCTSIZE2])
(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
cinfo->num_components * DCTSIZE2 *
cinfo->num_components * 2 * DCTSIZE2 *
sizeof(int));
coef_bit_ptr = &cinfo->coef_bits[0][0];
for (ci = 0; ci < cinfo->num_components; ci++)

@ -5,7 +5,7 @@
* Copyright (C) 1994-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2010, 2015-2016, D. R. Commander.
* Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander.
* Copyright (C) 2015, 2020, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@ -102,6 +102,8 @@ decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
/* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */
jzero_far((void *)coef->MCU_buffer[0],
(size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK)));
if (!cinfo->entropy->insufficient_data)
cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
/* Suspension forced; update state counters and exit */
coef->MCU_vert_offset = yoffset;
@ -227,6 +229,8 @@ consume_data(j_decompress_ptr cinfo)
}
}
}
if (!cinfo->entropy->insufficient_data)
cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
/* Try to fetch the MCU. */
if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
/* Suspension forced; update state counters and exit */
@ -326,19 +330,22 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
#ifdef BLOCK_SMOOTHING_SUPPORTED
/*
* This code applies interblock smoothing as described by section K.8
* of the JPEG standard: the first 5 AC coefficients are estimated from
* the DC values of a DCT block and its 8 neighboring blocks.
* This code applies interblock smoothing; the first 9 AC coefficients are
* estimated from the DC values of a DCT block and its 24 neighboring blocks.
* We apply smoothing only for progressive JPEG decoding, and only if
* the coefficients it can estimate are not yet known to full precision.
*/
/* Natural-order array positions of the first 5 zigzag-order coefficients */
/* Natural-order array positions of the first 9 zigzag-order coefficients */
#define Q01_POS 1
#define Q10_POS 8
#define Q20_POS 16
#define Q11_POS 9
#define Q02_POS 2
#define Q03_POS 3
#define Q12_POS 10
#define Q21_POS 17
#define Q30_POS 24
/*
* Determine whether block smoothing is applicable and safe.
@ -356,8 +363,8 @@ smoothing_ok(j_decompress_ptr cinfo)
int ci, coefi;
jpeg_component_info *compptr;
JQUANT_TBL *qtable;
int *coef_bits;
int *coef_bits_latch;
int *coef_bits, *prev_coef_bits;
int *coef_bits_latch, *prev_coef_bits_latch;
if (!cinfo->progressive_mode || cinfo->coef_bits == NULL)
return FALSE;
@ -366,34 +373,47 @@ smoothing_ok(j_decompress_ptr cinfo)
if (coef->coef_bits_latch == NULL)
coef->coef_bits_latch = (int *)
(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
cinfo->num_components *
cinfo->num_components * 2 *
(SAVED_COEFS * sizeof(int)));
coef_bits_latch = coef->coef_bits_latch;
prev_coef_bits_latch =
&coef->coef_bits_latch[cinfo->num_components * SAVED_COEFS];
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
ci++, compptr++) {
/* All components' quantization values must already be latched. */
if ((qtable = compptr->quant_table) == NULL)
return FALSE;
/* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
/* Verify DC & first 9 AC quantizers are nonzero to avoid zero-divide. */
if (qtable->quantval[0] == 0 ||
qtable->quantval[Q01_POS] == 0 ||
qtable->quantval[Q10_POS] == 0 ||
qtable->quantval[Q20_POS] == 0 ||
qtable->quantval[Q11_POS] == 0 ||
qtable->quantval[Q02_POS] == 0)
qtable->quantval[Q02_POS] == 0 ||
qtable->quantval[Q03_POS] == 0 ||
qtable->quantval[Q12_POS] == 0 ||
qtable->quantval[Q21_POS] == 0 ||
qtable->quantval[Q30_POS] == 0)
return FALSE;
/* DC values must be at least partly known for all components. */
coef_bits = cinfo->coef_bits[ci];
prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
if (coef_bits[0] < 0)
return FALSE;
coef_bits_latch[0] = coef_bits[0];
/* Block smoothing is helpful if some AC coefficients remain inaccurate. */
for (coefi = 1; coefi <= 5; coefi++) {
for (coefi = 1; coefi < SAVED_COEFS; coefi++) {
if (cinfo->input_scan_number > 1)
prev_coef_bits_latch[coefi] = prev_coef_bits[coefi];
else
prev_coef_bits_latch[coefi] = -1;
coef_bits_latch[coefi] = coef_bits[coefi];
if (coef_bits[coefi] != 0)
smoothing_useful = TRUE;
}
coef_bits_latch += SAVED_COEFS;
prev_coef_bits_latch += SAVED_COEFS;
}
return smoothing_useful;
@ -412,17 +432,20 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
JDIMENSION block_num, last_block_column;
int ci, block_row, block_rows, access_rows;
JBLOCKARRAY buffer;
JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row;
JBLOCKROW next_block_row, next_next_block_row;
JSAMPARRAY output_ptr;
JDIMENSION output_col;
jpeg_component_info *compptr;
inverse_DCT_method_ptr inverse_DCT;
boolean first_row, last_row;
boolean change_dc;
JCOEF *workspace;
int *coef_bits;
JQUANT_TBL *quanttbl;
JLONG Q00, Q01, Q02, Q10, Q11, Q20, num;
int DC1, DC2, DC3, DC4, DC5, DC6, DC7, DC8, DC9;
JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num;
int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12,
DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24,
DC25;
int Al, pred;
/* Keep a local variable to avoid looking it up more than once */
@ -434,10 +457,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
if (cinfo->input_scan_number == cinfo->output_scan_number) {
/* If input is working on current scan, we ordinarily want it to
* have completed the current row. But if input scan is DC,
* we want it to keep one row ahead so that next block row's DC
* we want it to keep two rows ahead so that next two block rows' DC
* values are up to date.
*/
JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
JDIMENSION delta = (cinfo->Ss == 0) ? 2 : 0;
if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta)
break;
}
@ -452,34 +475,53 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
if (!compptr->component_needed)
continue;
/* Count non-dummy DCT block rows in this iMCU row. */
if (cinfo->output_iMCU_row < last_iMCU_row) {
if (cinfo->output_iMCU_row < last_iMCU_row - 1) {
block_rows = compptr->v_samp_factor;
access_rows = block_rows * 3; /* this and next two iMCU rows */
} else if (cinfo->output_iMCU_row < last_iMCU_row) {
block_rows = compptr->v_samp_factor;
access_rows = block_rows * 2; /* this and next iMCU row */
last_row = FALSE;
} else {
/* NB: can't use last_row_height here; it is input-side-dependent! */
block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
if (block_rows == 0) block_rows = compptr->v_samp_factor;
access_rows = block_rows; /* this iMCU row only */
last_row = TRUE;
}
/* Align the virtual buffer for this component. */
if (cinfo->output_iMCU_row > 0) {
access_rows += compptr->v_samp_factor; /* prior iMCU row too */
if (cinfo->output_iMCU_row > 1) {
access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */
buffer = (*cinfo->mem->access_virt_barray)
((j_common_ptr)cinfo, coef->whole_image[ci],
(cinfo->output_iMCU_row - 2) * compptr->v_samp_factor,
(JDIMENSION)access_rows, FALSE);
buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */
} else if (cinfo->output_iMCU_row > 0) {
buffer = (*cinfo->mem->access_virt_barray)
((j_common_ptr)cinfo, coef->whole_image[ci],
(cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
(JDIMENSION)access_rows, FALSE);
buffer += compptr->v_samp_factor; /* point to current iMCU row */
first_row = FALSE;
} else {
buffer = (*cinfo->mem->access_virt_barray)
((j_common_ptr)cinfo, coef->whole_image[ci],
(JDIMENSION)0, (JDIMENSION)access_rows, FALSE);
first_row = TRUE;
}
/* Fetch component-dependent info */
coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
/* Fetch component-dependent info.
* If the current scan is incomplete, then we use the component-dependent
* info from the previous scan.
*/
if (cinfo->output_iMCU_row > cinfo->master->last_good_iMCU_row)
coef_bits =
coef->coef_bits_latch + ((ci + cinfo->num_components) * SAVED_COEFS);
else
coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
/* We only do DC interpolation if no AC coefficient data is available. */
change_dc =
coef_bits[1] == -1 && coef_bits[2] == -1 && coef_bits[3] == -1 &&
coef_bits[4] == -1 && coef_bits[5] == -1 && coef_bits[6] == -1 &&
coef_bits[7] == -1 && coef_bits[8] == -1 && coef_bits[9] == -1;
quanttbl = compptr->quant_table;
Q00 = quanttbl->quantval[0];
Q01 = quanttbl->quantval[Q01_POS];
@ -487,27 +529,51 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
Q20 = quanttbl->quantval[Q20_POS];
Q11 = quanttbl->quantval[Q11_POS];
Q02 = quanttbl->quantval[Q02_POS];
if (change_dc) {
Q03 = quanttbl->quantval[Q03_POS];
Q12 = quanttbl->quantval[Q12_POS];
Q21 = quanttbl->quantval[Q21_POS];
Q30 = quanttbl->quantval[Q30_POS];
}
inverse_DCT = cinfo->idct->inverse_DCT[ci];
output_ptr = output_buf[ci];
/* Loop over all DCT blocks to be processed. */
for (block_row = 0; block_row < block_rows; block_row++) {
buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
if (first_row && block_row == 0)
if (block_row > 0 || cinfo->output_iMCU_row > 0)
prev_block_row =
buffer[block_row - 1] + cinfo->master->first_MCU_col[ci];
else
prev_block_row = buffer_ptr;
if (block_row > 1 || cinfo->output_iMCU_row > 1)
prev_prev_block_row =
buffer[block_row - 2] + cinfo->master->first_MCU_col[ci];
else
prev_prev_block_row = prev_block_row;
if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row)
next_block_row =
buffer[block_row + 1] + cinfo->master->first_MCU_col[ci];
else
prev_block_row = buffer[block_row - 1] +
cinfo->master->first_MCU_col[ci];
if (last_row && block_row == block_rows - 1)
next_block_row = buffer_ptr;
if (block_row < block_rows - 2 ||
cinfo->output_iMCU_row < last_iMCU_row - 1)
next_next_block_row =
buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
else
next_block_row = buffer[block_row + 1] +
cinfo->master->first_MCU_col[ci];
next_next_block_row = next_block_row;
/* We fetch the surrounding DC values using a sliding-register approach.
* Initialize all nine here so as to do the right thing on narrow pics.
* Initialize all 25 here so as to do the right thing on narrow pics.
*/
DC1 = DC2 = DC3 = (int)prev_block_row[0][0];
DC4 = DC5 = DC6 = (int)buffer_ptr[0][0];
DC7 = DC8 = DC9 = (int)next_block_row[0][0];
DC01 = DC02 = DC03 = DC04 = DC05 = (int)prev_prev_block_row[0][0];
DC06 = DC07 = DC08 = DC09 = DC10 = (int)prev_block_row[0][0];
DC11 = DC12 = DC13 = DC14 = DC15 = (int)buffer_ptr[0][0];
DC16 = DC17 = DC18 = DC19 = DC20 = (int)next_block_row[0][0];
DC21 = DC22 = DC23 = DC24 = DC25 = (int)next_next_block_row[0][0];
output_col = 0;
last_block_column = compptr->width_in_blocks - 1;
for (block_num = cinfo->master->first_MCU_col[ci];
@ -515,18 +581,39 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
/* Fetch current DCT block into workspace so we can modify it. */
jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1);
/* Update DC values */
if (block_num < last_block_column) {
DC3 = (int)prev_block_row[1][0];
DC6 = (int)buffer_ptr[1][0];
DC9 = (int)next_block_row[1][0];
if (block_num == cinfo->master->first_MCU_col[ci] &&
block_num < last_block_column) {
DC04 = (int)prev_prev_block_row[1][0];
DC09 = (int)prev_block_row[1][0];
DC14 = (int)buffer_ptr[1][0];
DC19 = (int)next_block_row[1][0];
DC24 = (int)next_next_block_row[1][0];
}
/* Compute coefficient estimates per K.8.
* An estimate is applied only if coefficient is still zero,
* and is not known to be fully accurate.
if (block_num + 1 < last_block_column) {
DC05 = (int)prev_prev_block_row[2][0];
DC10 = (int)prev_block_row[2][0];
DC15 = (int)buffer_ptr[2][0];
DC20 = (int)next_block_row[2][0];
DC25 = (int)next_next_block_row[2][0];
}
/* If DC interpolation is enabled, compute coefficient estimates using
* a Gaussian-like kernel, keeping the averages of the DC values.
*
* If DC interpolation is disabled, compute coefficient estimates using
* an algorithm similar to the one described in Section K.8 of the JPEG
* standard, except applied to a 5x5 window rather than a 3x3 window.
*
* An estimate is applied only if the coefficient is still zero and is
* not known to be fully accurate.
*/
/* AC01 */
if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) {
num = 36 * Q00 * (DC4 - DC6);
num = Q00 * (change_dc ?
(-DC01 - DC02 + DC04 + DC05 - 3 * DC06 + 13 * DC07 -
13 * DC09 + 3 * DC10 - 3 * DC11 + 38 * DC12 - 38 * DC14 +
3 * DC15 - 3 * DC16 + 13 * DC17 - 13 * DC19 + 3 * DC20 -
DC21 - DC22 + DC24 + DC25) :
(-7 * DC11 + 50 * DC12 - 50 * DC14 + 7 * DC15));
if (num >= 0) {
pred = (int)(((Q01 << 7) + num) / (Q01 << 8));
if (Al > 0 && pred >= (1 << Al))
@ -541,7 +628,12 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC10 */
if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) {
num = 36 * Q00 * (DC2 - DC8);
num = Q00 * (change_dc ?
(-DC01 - 3 * DC02 - 3 * DC03 - 3 * DC04 - DC05 - DC06 +
13 * DC07 + 38 * DC08 + 13 * DC09 - DC10 + DC16 -
13 * DC17 - 38 * DC18 - 13 * DC19 + DC20 + DC21 +
3 * DC22 + 3 * DC23 + 3 * DC24 + DC25) :
(-7 * DC03 + 50 * DC08 - 50 * DC18 + 7 * DC23));
if (num >= 0) {
pred = (int)(((Q10 << 7) + num) / (Q10 << 8));
if (Al > 0 && pred >= (1 << Al))
@ -556,7 +648,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC20 */
if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) {
num = 9 * Q00 * (DC2 + DC8 - 2 * DC5);
num = Q00 * (change_dc ?
(DC03 + 2 * DC07 + 7 * DC08 + 2 * DC09 - 5 * DC12 - 14 * DC13 -
5 * DC14 + 2 * DC17 + 7 * DC18 + 2 * DC19 + DC23) :
(-DC03 + 13 * DC08 - 24 * DC13 + 13 * DC18 - DC23));
if (num >= 0) {
pred = (int)(((Q20 << 7) + num) / (Q20 << 8));
if (Al > 0 && pred >= (1 << Al))
@ -571,7 +666,11 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC11 */
if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) {
num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
num = Q00 * (change_dc ?
(-DC01 + DC05 + 9 * DC07 - 9 * DC09 - 9 * DC17 +
9 * DC19 + DC21 - DC25) :
(DC10 + DC16 - 10 * DC17 + 10 * DC19 - DC02 - DC20 + DC22 -
DC24 + DC04 - DC06 + 10 * DC07 - 10 * DC09));
if (num >= 0) {
pred = (int)(((Q11 << 7) + num) / (Q11 << 8));
if (Al > 0 && pred >= (1 << Al))
@ -586,7 +685,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
/* AC02 */
if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) {
num = 9 * Q00 * (DC4 + DC6 - 2 * DC5);
num = Q00 * (change_dc ?
(2 * DC07 - 5 * DC08 + 2 * DC09 + DC11 + 7 * DC12 - 14 * DC13 +
7 * DC14 + DC15 + 2 * DC17 - 5 * DC18 + 2 * DC19) :
(-DC11 + 13 * DC12 - 24 * DC13 + 13 * DC14 - DC15));
if (num >= 0) {
pred = (int)(((Q02 << 7) + num) / (Q02 << 8));
if (Al > 0 && pred >= (1 << Al))
@ -599,14 +701,96 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
}
workspace[2] = (JCOEF)pred;
}
if (change_dc) {
/* AC03 */
if ((Al = coef_bits[6]) != 0 && workspace[3] == 0) {
num = Q00 * (DC07 - DC09 + 2 * DC12 - 2 * DC14 + DC17 - DC19);
if (num >= 0) {
pred = (int)(((Q03 << 7) + num) / (Q03 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
} else {
pred = (int)(((Q03 << 7) - num) / (Q03 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
pred = -pred;
}
workspace[3] = (JCOEF)pred;
}
/* AC12 */
if ((Al = coef_bits[7]) != 0 && workspace[10] == 0) {
num = Q00 * (DC07 - 3 * DC08 + DC09 - DC17 + 3 * DC18 - DC19);
if (num >= 0) {
pred = (int)(((Q12 << 7) + num) / (Q12 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
} else {
pred = (int)(((Q12 << 7) - num) / (Q12 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
pred = -pred;
}
workspace[10] = (JCOEF)pred;
}
/* AC21 */
if ((Al = coef_bits[8]) != 0 && workspace[17] == 0) {
num = Q00 * (DC07 - DC09 - 3 * DC12 + 3 * DC14 + DC17 - DC19);
if (num >= 0) {
pred = (int)(((Q21 << 7) + num) / (Q21 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
} else {
pred = (int)(((Q21 << 7) - num) / (Q21 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
pred = -pred;
}
workspace[17] = (JCOEF)pred;
}
/* AC30 */
if ((Al = coef_bits[9]) != 0 && workspace[24] == 0) {
num = Q00 * (DC07 + 2 * DC08 + DC09 - DC17 - 2 * DC18 - DC19);
if (num >= 0) {
pred = (int)(((Q30 << 7) + num) / (Q30 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
} else {
pred = (int)(((Q30 << 7) - num) / (Q30 << 8));
if (Al > 0 && pred >= (1 << Al))
pred = (1 << Al) - 1;
pred = -pred;
}
workspace[24] = (JCOEF)pred;
}
/* coef_bits[0] is non-negative. Otherwise this function would not
* be called.
*/
num = Q00 *
(-2 * DC01 - 6 * DC02 - 8 * DC03 - 6 * DC04 - 2 * DC05 -
6 * DC06 + 6 * DC07 + 42 * DC08 + 6 * DC09 - 6 * DC10 -
8 * DC11 + 42 * DC12 + 152 * DC13 + 42 * DC14 - 8 * DC15 -
6 * DC16 + 6 * DC17 + 42 * DC18 + 6 * DC19 - 6 * DC20 -
2 * DC21 - 6 * DC22 - 8 * DC23 - 6 * DC24 - 2 * DC25);
if (num >= 0) {
pred = (int)(((Q00 << 7) + num) / (Q00 << 8));
} else {
pred = (int)(((Q00 << 7) - num) / (Q00 << 8));
pred = -pred;
}
workspace[0] = (JCOEF)pred;
} /* change_dc */
/* OK, do the IDCT */
(*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr,
output_col);
/* Advance for next column */
DC1 = DC2; DC2 = DC3;
DC4 = DC5; DC5 = DC6;
DC7 = DC8; DC8 = DC9;
buffer_ptr++, prev_block_row++, next_block_row++;
DC01 = DC02; DC02 = DC03; DC03 = DC04; DC04 = DC05;
DC06 = DC07; DC07 = DC08; DC08 = DC09; DC09 = DC10;
DC11 = DC12; DC12 = DC13; DC13 = DC14; DC14 = DC15;
DC16 = DC17; DC17 = DC18; DC18 = DC19; DC19 = DC20;
DC21 = DC22; DC22 = DC23; DC23 = DC24; DC24 = DC25;
buffer_ptr++, prev_block_row++, next_block_row++,
prev_prev_block_row++, next_next_block_row++;
output_col += compptr->_DCT_scaled_size;
}
output_ptr += compptr->_DCT_scaled_size;
@ -655,7 +839,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
#ifdef BLOCK_SMOOTHING_SUPPORTED
/* If block smoothing could be used, need a bigger window */
if (cinfo->progressive_mode)
access_rows *= 3;
access_rows *= 5;
#endif
coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE,

@ -5,6 +5,7 @@
* Copyright (C) 1994-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2020, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*/
@ -51,7 +52,7 @@ typedef struct {
#ifdef BLOCK_SMOOTHING_SUPPORTED
/* When doing block smoothing, we latch coefficient Al values here */
int *coef_bits_latch;
#define SAVED_COEFS 6 /* we save coef_bits[0..5] */
#define SAVED_COEFS 10 /* we save coef_bits[0..9] */
#endif
} my_coef_controller;

@ -45,9 +45,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
y = GETJSAMPLE(*inptr0++);
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
y = *inptr0++;
cb = *inptr1++;
cr = *inptr2++;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
@ -58,18 +58,18 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
y = GETJSAMPLE(*inptr0++);
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
y = *inptr0++;
cb = *inptr1++;
cr = *inptr2++;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
b = range_limit[y + Cbbtab[cb]];
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr0++);
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
y = *inptr0++;
cb = *inptr1++;
cr = *inptr2++;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
@ -80,9 +80,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr += 4;
}
if (num_cols & 1) {
y = GETJSAMPLE(*inptr0);
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
y = *inptr0;
cb = *inptr1;
cr = *inptr2;
r = range_limit[y + Crrtab[cr]];
g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
SCALEBITS))];
@ -125,9 +125,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
y = GETJSAMPLE(*inptr0++);
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
y = *inptr0++;
cb = *inptr1++;
cr = *inptr2++;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -139,9 +139,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
y = GETJSAMPLE(*inptr0++);
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
y = *inptr0++;
cb = *inptr1++;
cr = *inptr2++;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -150,9 +150,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr0++);
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
y = *inptr0++;
cb = *inptr1++;
cr = *inptr2++;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -165,9 +165,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr += 4;
}
if (num_cols & 1) {
y = GETJSAMPLE(*inptr0);
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
y = *inptr0;
cb = *inptr1;
cr = *inptr2;
r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
g = range_limit[DITHER_565_G(y +
((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@ -202,32 +202,32 @@ rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
r = GETJSAMPLE(*inptr0++);
g = GETJSAMPLE(*inptr1++);
b = GETJSAMPLE(*inptr2++);
r = *inptr0++;
g = *inptr1++;
b = *inptr2++;
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
outptr += 2;
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
r = GETJSAMPLE(*inptr0++);
g = GETJSAMPLE(*inptr1++);
b = GETJSAMPLE(*inptr2++);
r = *inptr0++;
g = *inptr1++;
b = *inptr2++;
rgb = PACK_SHORT_565(r, g, b);
r = GETJSAMPLE(*inptr0++);
g = GETJSAMPLE(*inptr1++);
b = GETJSAMPLE(*inptr2++);
r = *inptr0++;
g = *inptr1++;
b = *inptr2++;
rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
outptr += 4;
}
if (num_cols & 1) {
r = GETJSAMPLE(*inptr0);
g = GETJSAMPLE(*inptr1);
b = GETJSAMPLE(*inptr2);
r = *inptr0;
g = *inptr1;
b = *inptr2;
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
}
@ -259,24 +259,24 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
if (PACK_NEED_ALIGNMENT(outptr)) {
r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
r = range_limit[DITHER_565_R(*inptr0++, d0)];
g = range_limit[DITHER_565_G(*inptr1++, d0)];
b = range_limit[DITHER_565_B(*inptr2++, d0)];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
outptr += 2;
num_cols--;
}
for (col = 0; col < (num_cols >> 1); col++) {
r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
r = range_limit[DITHER_565_R(*inptr0++, d0)];
g = range_limit[DITHER_565_G(*inptr1++, d0)];
b = range_limit[DITHER_565_B(*inptr2++, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
r = range_limit[DITHER_565_R(*inptr0++, d0)];
g = range_limit[DITHER_565_G(*inptr1++, d0)];
b = range_limit[DITHER_565_B(*inptr2++, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
@ -284,9 +284,9 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr += 4;
}
if (num_cols & 1) {
r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)];
g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)];
b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)];
r = range_limit[DITHER_565_R(*inptr0, d0)];
g = range_limit[DITHER_565_G(*inptr1, d0)];
b = range_limit[DITHER_565_B(*inptr2, d0)];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr = (INT16)rgb;
}

@ -53,9 +53,9 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
y = GETJSAMPLE(inptr0[col]);
cb = GETJSAMPLE(inptr1[col]);
cr = GETJSAMPLE(inptr2[col]);
y = inptr0[col];
cb = inptr1[col];
cr = inptr2[col];
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
outptr[RGB_GREEN] = range_limit[y +
@ -93,7 +93,6 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
inptr = input_buf[0][input_row++];
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
/* We can dispense with GETJSAMPLE() here */
outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
/* Set unused byte to 0xFF so it can be interpreted as an opaque */
/* alpha channel value */
@ -128,7 +127,6 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
/* We can dispense with GETJSAMPLE() here */
outptr[RGB_RED] = inptr0[col];
outptr[RGB_GREEN] = inptr1[col];
outptr[RGB_BLUE] = inptr2[col];

@ -341,9 +341,9 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
r = GETJSAMPLE(inptr0[col]);
g = GETJSAMPLE(inptr1[col]);
b = GETJSAMPLE(inptr2[col]);
r = inptr0[col];
g = inptr1[col];
b = inptr2[col];
/* Y */
outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
ctab[b + B_Y_OFF]) >> SCALEBITS);
@ -550,9 +550,9 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
input_row++;
outptr = *output_buf++;
for (col = 0; col < num_cols; col++) {
y = GETJSAMPLE(inptr0[col]);
cb = GETJSAMPLE(inptr1[col]);
cr = GETJSAMPLE(inptr2[col]);
y = inptr0[col];
cb = inptr1[col];
cr = inptr2[col];
/* Range-limiting is essential due to noise introduced by DCT losses. */
outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */
@ -560,7 +560,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
SCALEBITS)))];
outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
/* K passes through unchanged */
outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */
outptr[3] = inptr3[col];
outptr += 4;
}
}

@ -5,6 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
* Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -39,24 +40,6 @@ typedef struct {
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
/* This macro is to work around compilers with missing or broken
* structure assignment. You'll need to fix this code if you have
* such a compiler and you change MAX_COMPS_IN_SCAN.
*/
#ifndef NO_STRUCT_ASSIGN
#define ASSIGN_STATE(dest, src) ((dest) = (src))
#else
#if MAX_COMPS_IN_SCAN == 4
#define ASSIGN_STATE(dest, src) \
((dest).last_dc_val[0] = (src).last_dc_val[0], \
(dest).last_dc_val[1] = (src).last_dc_val[1], \
(dest).last_dc_val[2] = (src).last_dc_val[2], \
(dest).last_dc_val[3] = (src).last_dc_val[3])
#endif
#endif
typedef struct {
struct jpeg_entropy_decoder pub; /* public fields */
@ -325,7 +308,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
bytes_in_buffer = cinfo->src->bytes_in_buffer;
}
bytes_in_buffer--;
c = GETJOCTET(*next_input_byte++);
c = *next_input_byte++;
/* If it's 0xFF, check and discard stuffed zero byte */
if (c == 0xFF) {
@ -342,7 +325,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
bytes_in_buffer = cinfo->src->bytes_in_buffer;
}
bytes_in_buffer--;
c = GETJOCTET(*next_input_byte++);
c = *next_input_byte++;
} while (c == 0xFF);
if (c == 0) {
@ -405,8 +388,8 @@ no_more_bytes:
#define GET_BYTE { \
register int c0, c1; \
c0 = GETJOCTET(*buffer++); \
c1 = GETJOCTET(*buffer); \
c0 = *buffer++; \
c1 = *buffer; \
/* Pre-execute most common case */ \
get_buffer = (get_buffer << 8) | c0; \
bits_left += 8; \
@ -423,7 +406,7 @@ no_more_bytes:
} \
}
#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
/* Pre-fetch 48 bytes, because the holding register is 64-bit */
#define FILL_BIT_BUFFER_FAST \
@ -557,6 +540,12 @@ process_restart(j_decompress_ptr cinfo)
}
#if defined(__has_feature)
#if __has_feature(undefined_behavior_sanitizer)
__attribute__((no_sanitize("signed-integer-overflow"),
no_sanitize("unsigned-integer-overflow")))
#endif
#endif
LOCAL(boolean)
decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
{
@ -568,7 +557,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
ASSIGN_STATE(state, entropy->saved);
state = entropy->saved;
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@ -589,11 +578,15 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
/* Convert DC difference to actual value, update last_dc_val */
int ci = cinfo->MCU_membership[blkn];
/* This is really just
* s += state.last_dc_val[ci];
* It is written this way in order to shut up UBSan.
/* Certain malformed JPEG images produce repeated DC coefficient
* differences of 2047 or -2047, which causes state.last_dc_val[ci] to
* grow until it overflows or underflows a 32-bit signed integer. This
* behavior is, to the best of our understanding, innocuous, and it is
* unclear how to work around it without potentially affecting
* performance. Thus, we (hopefully temporarily) suppress UBSan integer
* overflow errors for this function.
*/
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
s += state.last_dc_val[ci];
state.last_dc_val[ci] = s;
if (block) {
/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
@ -653,7 +646,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Completed MCU, so update state */
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
ASSIGN_STATE(entropy->saved, state);
entropy->saved = state;
return TRUE;
}
@ -671,7 +664,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
buffer = (JOCTET *)br_state.next_input_byte;
ASSIGN_STATE(state, entropy->saved);
state = entropy->saved;
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@ -688,7 +681,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
int ci = cinfo->MCU_membership[blkn];
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
s += state.last_dc_val[ci];
state.last_dc_val[ci] = s;
if (block)
(*block)[0] = (JCOEF)s;
@ -740,7 +733,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
br_state.next_input_byte = buffer;
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
ASSIGN_STATE(entropy->saved, state);
entropy->saved = state;
return TRUE;
}
@ -795,7 +788,8 @@ use_slow:
}
/* Account for restart interval (no-op if not using restarts) */
entropy->restarts_to_go--;
if (cinfo->restart_interval)
entropy->restarts_to_go--;
return TRUE;
}

@ -4,7 +4,8 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2010-2011, 2015-2016, D. R. Commander.
* Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander.
* Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
typedef size_t bit_buf_type; /* type of bit-extraction buffer */
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
#elif defined(__x86_64__) && defined(__ILP32__)
typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
#else
typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */
@ -228,7 +234,10 @@ slowlabel: \
s |= GET_BITS(1); \
nb++; \
} \
s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
if (nb > 16) \
s = 0; \
else \
s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
}
/* Out-of-line case for Huffman code fetching */

@ -38,18 +38,18 @@ marker_is_icc(jpeg_saved_marker_ptr marker)
marker->marker == ICC_MARKER &&
marker->data_length >= ICC_OVERHEAD_LEN &&
/* verify the identifying string */
GETJOCTET(marker->data[0]) == 0x49 &&
GETJOCTET(marker->data[1]) == 0x43 &&
GETJOCTET(marker->data[2]) == 0x43 &&
GETJOCTET(marker->data[3]) == 0x5F &&
GETJOCTET(marker->data[4]) == 0x50 &&
GETJOCTET(marker->data[5]) == 0x52 &&
GETJOCTET(marker->data[6]) == 0x4F &&
GETJOCTET(marker->data[7]) == 0x46 &&
GETJOCTET(marker->data[8]) == 0x49 &&
GETJOCTET(marker->data[9]) == 0x4C &&
GETJOCTET(marker->data[10]) == 0x45 &&
GETJOCTET(marker->data[11]) == 0x0;
marker->data[0] == 0x49 &&
marker->data[1] == 0x43 &&
marker->data[2] == 0x43 &&
marker->data[3] == 0x5F &&
marker->data[4] == 0x50 &&
marker->data[5] == 0x52 &&
marker->data[6] == 0x4F &&
marker->data[7] == 0x46 &&
marker->data[8] == 0x49 &&
marker->data[9] == 0x4C &&
marker->data[10] == 0x45 &&
marker->data[11] == 0x0;
}
@ -102,12 +102,12 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
if (marker_is_icc(marker)) {
if (num_markers == 0)
num_markers = GETJOCTET(marker->data[13]);
else if (num_markers != GETJOCTET(marker->data[13])) {
num_markers = marker->data[13];
else if (num_markers != marker->data[13]) {
WARNMS(cinfo, JWRN_BOGUS_ICC); /* inconsistent num_markers fields */
return FALSE;
}
seq_no = GETJOCTET(marker->data[12]);
seq_no = marker->data[12];
if (seq_no <= 0 || seq_no > num_markers) {
WARNMS(cinfo, JWRN_BOGUS_ICC); /* bogus sequence number */
return FALSE;
@ -154,7 +154,7 @@ jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
JOCTET FAR *src_ptr;
JOCTET *dst_ptr;
unsigned int length;
seq_no = GETJOCTET(marker->data[12]);
seq_no = marker->data[12];
dst_ptr = icc_data + data_offset[seq_no];
src_ptr = marker->data + ICC_OVERHEAD_LEN;
length = data_length[seq_no];

@ -151,7 +151,7 @@ typedef my_marker_reader *my_marker_ptr;
#define INPUT_BYTE(cinfo, V, action) \
MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
bytes_in_buffer--; \
V = GETJOCTET(*next_input_byte++); )
V = *next_input_byte++; )
/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
* V should be declared unsigned int or perhaps JLONG.
@ -159,10 +159,10 @@ typedef my_marker_reader *my_marker_ptr;
#define INPUT_2BYTES(cinfo, V, action) \
MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
bytes_in_buffer--; \
V = ((unsigned int)GETJOCTET(*next_input_byte++)) << 8; \
V = ((unsigned int)(*next_input_byte++)) << 8; \
MAKE_BYTE_AVAIL(cinfo, action); \
bytes_in_buffer--; \
V += GETJOCTET(*next_input_byte++); )
V += *next_input_byte++; )
/*
@ -608,18 +608,18 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
JLONG totallen = (JLONG)datalen + remaining;
if (datalen >= APP0_DATA_LEN &&
GETJOCTET(data[0]) == 0x4A &&
GETJOCTET(data[1]) == 0x46 &&
GETJOCTET(data[2]) == 0x49 &&
GETJOCTET(data[3]) == 0x46 &&
GETJOCTET(data[4]) == 0) {
data[0] == 0x4A &&
data[1] == 0x46 &&
data[2] == 0x49 &&
data[3] == 0x46 &&
data[4] == 0) {
/* Found JFIF APP0 marker: save info */
cinfo->saw_JFIF_marker = TRUE;
cinfo->JFIF_major_version = GETJOCTET(data[5]);
cinfo->JFIF_minor_version = GETJOCTET(data[6]);
cinfo->density_unit = GETJOCTET(data[7]);
cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
cinfo->JFIF_major_version = data[5];
cinfo->JFIF_minor_version = data[6];
cinfo->density_unit = data[7];
cinfo->X_density = (data[8] << 8) + data[9];
cinfo->Y_density = (data[10] << 8) + data[11];
/* Check version.
* Major version must be 1, anything else signals an incompatible change.
* (We used to treat this as an error, but now it's a nonfatal warning,
@ -634,24 +634,22 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
/* Validate thumbnail dimensions and issue appropriate messages */
if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
GETJOCTET(data[12]), GETJOCTET(data[13]));
if (data[12] | data[13])
TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, data[12], data[13]);
totallen -= APP0_DATA_LEN;
if (totallen !=
((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG)3))
if (totallen != ((JLONG)data[12] * (JLONG)data[13] * (JLONG)3))
TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen);
} else if (datalen >= 6 &&
GETJOCTET(data[0]) == 0x4A &&
GETJOCTET(data[1]) == 0x46 &&
GETJOCTET(data[2]) == 0x58 &&
GETJOCTET(data[3]) == 0x58 &&
GETJOCTET(data[4]) == 0) {
data[0] == 0x4A &&
data[1] == 0x46 &&
data[2] == 0x58 &&
data[3] == 0x58 &&
data[4] == 0) {
/* Found JFIF "JFXX" extension APP0 marker */
/* The library doesn't actually do anything with these,
* but we try to produce a helpful trace message.
*/
switch (GETJOCTET(data[5])) {
switch (data[5]) {
case 0x10:
TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen);
break;
@ -662,8 +660,7 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen);
break;
default:
TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
GETJOCTET(data[5]), (int)totallen);
TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, data[5], (int)totallen);
break;
}
} else {
@ -684,16 +681,16 @@ examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
unsigned int version, flags0, flags1, transform;
if (datalen >= APP14_DATA_LEN &&
GETJOCTET(data[0]) == 0x41 &&
GETJOCTET(data[1]) == 0x64 &&
GETJOCTET(data[2]) == 0x6F &&
GETJOCTET(data[3]) == 0x62 &&
GETJOCTET(data[4]) == 0x65) {
data[0] == 0x41 &&
data[1] == 0x64 &&
data[2] == 0x6F &&
data[3] == 0x62 &&
data[4] == 0x65) {
/* Found Adobe APP14 marker */
version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
transform = GETJOCTET(data[11]);
version = (data[5] << 8) + data[6];
flags0 = (data[7] << 8) + data[8];
flags1 = (data[9] << 8) + data[10];
transform = data[11];
TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
cinfo->saw_Adobe_marker = TRUE;
cinfo->Adobe_transform = (UINT8)transform;

@ -5,7 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 2002-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2016, D. R. Commander.
* Copyright (C) 2009-2011, 2016, 2019, D. R. Commander.
* Copyright (C) 2013, Linaro Limited.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
@ -22,7 +22,6 @@
#include "jpeglib.h"
#include "jpegcomp.h"
#include "jdmaster.h"
#include "jsimd.h"
/*
@ -70,17 +69,6 @@ use_merged_upsample(j_decompress_ptr cinfo)
cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
return FALSE;
#ifdef WITH_SIMD
/* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling
isn't, then disabling merged upsampling is likely to be faster when
decompressing YCbCr JPEG images. */
if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() &&
jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr &&
(cinfo->out_color_space == JCS_RGB ||
(cinfo->out_color_space >= JCS_EXT_RGB &&
cinfo->out_color_space <= JCS_EXT_ARGB)))
return FALSE;
#endif
/* ??? also need to test for upsample-time rescaling, when & if supported */
return TRUE; /* by golly, it'll work... */
#else
@ -580,6 +568,7 @@ master_selection(j_decompress_ptr cinfo)
*/
cinfo->master->first_iMCU_col = 0;
cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1;
cinfo->master->last_good_iMCU_row = 0;
#ifdef D_MULTISCAN_FILES_SUPPORTED
/* If jpeg_start_decompress will read the whole file, initialize

@ -43,20 +43,20 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each pair of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cb = *inptr1++;
cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 2 Y values and emit 2 pixels */
y = GETJSAMPLE(*inptr0++);
y = *inptr0++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr0++);
y = *inptr0++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@ -68,12 +68,12 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cb = *inptr1;
cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr0);
y = *inptr0;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@ -115,21 +115,21 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* Loop for each pair of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cb = *inptr1++;
cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 2 Y values and emit 2 pixels */
y = GETJSAMPLE(*inptr0++);
y = *inptr0++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr0++);
y = *inptr0++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
@ -142,12 +142,12 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cb = *inptr1;
cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr0);
y = *inptr0;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
@ -189,20 +189,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each group of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cb = *inptr1++;
cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
y = GETJSAMPLE(*inptr00++);
y = *inptr00++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr00++);
y = *inptr00++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@ -211,13 +211,13 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
WRITE_TWO_PIXELS(outptr0, rgb);
outptr0 += 4;
y = GETJSAMPLE(*inptr01++);
y = *inptr01++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr01++);
y = *inptr01++;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@ -229,20 +229,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cb = *inptr1;
cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr00);
y = *inptr00;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr0 = (INT16)rgb;
y = GETJSAMPLE(*inptr01);
y = *inptr01;
r = range_limit[y + cred];
g = range_limit[y + cgreen];
b = range_limit[y + cblue];
@ -287,21 +287,21 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* Loop for each group of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cb = *inptr1++;
cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
y = GETJSAMPLE(*inptr00++);
y = *inptr00++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
d0 = DITHER_ROTATE(d0);
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr00++);
y = *inptr00++;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
@ -311,14 +311,14 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
WRITE_TWO_PIXELS(outptr0, rgb);
outptr0 += 4;
y = GETJSAMPLE(*inptr01++);
y = *inptr01++;
r = range_limit[DITHER_565_R(y + cred, d1)];
g = range_limit[DITHER_565_G(y + cgreen, d1)];
b = range_limit[DITHER_565_B(y + cblue, d1)];
d1 = DITHER_ROTATE(d1);
rgb = PACK_SHORT_565(r, g, b);
y = GETJSAMPLE(*inptr01++);
y = *inptr01++;
r = range_limit[DITHER_565_R(y + cred, d1)];
g = range_limit[DITHER_565_G(y + cgreen, d1)];
b = range_limit[DITHER_565_B(y + cblue, d1)];
@ -331,20 +331,20 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cb = *inptr1;
cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr00);
y = *inptr00;
r = range_limit[DITHER_565_R(y + cred, d0)];
g = range_limit[DITHER_565_G(y + cgreen, d0)];
b = range_limit[DITHER_565_B(y + cblue, d0)];
rgb = PACK_SHORT_565(r, g, b);
*(INT16 *)outptr0 = (INT16)rgb;
y = GETJSAMPLE(*inptr01);
y = *inptr01;
r = range_limit[DITHER_565_R(y + cred, d1)];
g = range_limit[DITHER_565_G(y + cgreen, d1)];
b = range_limit[DITHER_565_B(y + cblue, d1)];

@ -46,13 +46,13 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each pair of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cb = *inptr1++;
cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 2 Y values and emit 2 pixels */
y = GETJSAMPLE(*inptr0++);
y = *inptr0++;
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
@ -60,7 +60,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr[RGB_ALPHA] = 0xFF;
#endif
outptr += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr0++);
y = *inptr0++;
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
@ -71,12 +71,12 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
}
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cb = *inptr1;
cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr0);
y = *inptr0;
outptr[RGB_RED] = range_limit[y + cred];
outptr[RGB_GREEN] = range_limit[y + cgreen];
outptr[RGB_BLUE] = range_limit[y + cblue];
@ -120,13 +120,13 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Loop for each group of output pixels */
for (col = cinfo->output_width >> 1; col > 0; col--) {
/* Do the chroma part of the calculation */
cb = GETJSAMPLE(*inptr1++);
cr = GETJSAMPLE(*inptr2++);
cb = *inptr1++;
cr = *inptr2++;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
/* Fetch 4 Y values and emit 4 pixels */
y = GETJSAMPLE(*inptr00++);
y = *inptr00++;
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
@ -134,7 +134,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr0[RGB_ALPHA] = 0xFF;
#endif
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr00++);
y = *inptr00++;
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
@ -142,7 +142,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr0[RGB_ALPHA] = 0xFF;
#endif
outptr0 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
y = *inptr01++;
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
@ -150,7 +150,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
outptr1[RGB_ALPHA] = 0xFF;
#endif
outptr1 += RGB_PIXELSIZE;
y = GETJSAMPLE(*inptr01++);
y = *inptr01++;
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];
@ -161,19 +161,19 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
}
/* If image width is odd, do the last output column separately */
if (cinfo->output_width & 1) {
cb = GETJSAMPLE(*inptr1);
cr = GETJSAMPLE(*inptr2);
cb = *inptr1;
cr = *inptr2;
cred = Crrtab[cr];
cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
cblue = Cbbtab[cb];
y = GETJSAMPLE(*inptr00);
y = *inptr00;
outptr0[RGB_RED] = range_limit[y + cred];
outptr0[RGB_GREEN] = range_limit[y + cgreen];
outptr0[RGB_BLUE] = range_limit[y + cblue];
#ifdef RGB_ALPHA
outptr0[RGB_ALPHA] = 0xFF;
#endif
y = GETJSAMPLE(*inptr01);
y = *inptr01;
outptr1[RGB_RED] = range_limit[y + cred];
outptr1[RGB_GREEN] = range_limit[y + cgreen];
outptr1[RGB_BLUE] = range_limit[y + cblue];

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1995-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2015-2016, 2018, D. R. Commander.
* Copyright (C) 2015-2016, 2018-2021, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -41,25 +41,6 @@ typedef struct {
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
/* This macro is to work around compilers with missing or broken
* structure assignment. You'll need to fix this code if you have
* such a compiler and you change MAX_COMPS_IN_SCAN.
*/
#ifndef NO_STRUCT_ASSIGN
#define ASSIGN_STATE(dest, src) ((dest) = (src))
#else
#if MAX_COMPS_IN_SCAN == 4
#define ASSIGN_STATE(dest, src) \
((dest).EOBRUN = (src).EOBRUN, \
(dest).last_dc_val[0] = (src).last_dc_val[0], \
(dest).last_dc_val[1] = (src).last_dc_val[1], \
(dest).last_dc_val[2] = (src).last_dc_val[2], \
(dest).last_dc_val[3] = (src).last_dc_val[3])
#endif
#endif
typedef struct {
struct jpeg_entropy_decoder pub; /* public fields */
@ -102,7 +83,7 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
boolean is_DC_band, bad;
int ci, coefi, tbl;
d_derived_tbl **pdtbl;
int *coef_bit_ptr;
int *coef_bit_ptr, *prev_coef_bit_ptr;
jpeg_component_info *compptr;
is_DC_band = (cinfo->Ss == 0);
@ -143,8 +124,15 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
int cindex = cinfo->cur_comp_info[ci]->component_index;
coef_bit_ptr = &cinfo->coef_bits[cindex][0];
prev_coef_bit_ptr = &cinfo->coef_bits[cindex + cinfo->num_components][0];
if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
if (cinfo->input_scan_number > 1)
prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
else
prev_coef_bit_ptr[coefi] = 0;
}
for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
if (cinfo->Ah != expected)
@ -323,7 +311,7 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Load up working state */
BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
ASSIGN_STATE(state, entropy->saved);
state = entropy->saved;
/* Outer loop handles each block in the MCU */
@ -356,11 +344,12 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
/* Completed MCU, so update state */
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
ASSIGN_STATE(entropy->saved, state);
entropy->saved = state;
}
/* Account for restart interval (no-op if not using restarts) */
entropy->restarts_to_go--;
if (cinfo->restart_interval)
entropy->restarts_to_go--;
return TRUE;
}
@ -444,7 +433,8 @@ decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
}
/* Account for restart interval (no-op if not using restarts) */
entropy->restarts_to_go--;
if (cinfo->restart_interval)
entropy->restarts_to_go--;
return TRUE;
}
@ -495,7 +485,8 @@ decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
/* Account for restart interval (no-op if not using restarts) */
entropy->restarts_to_go--;
if (cinfo->restart_interval)
entropy->restarts_to_go--;
return TRUE;
}
@ -638,7 +629,8 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
}
/* Account for restart interval (no-op if not using restarts) */
entropy->restarts_to_go--;
if (cinfo->restart_interval)
entropy->restarts_to_go--;
return TRUE;
@ -676,7 +668,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo)
/* Create progression status table */
cinfo->coef_bits = (int (*)[DCTSIZE2])
(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
cinfo->num_components * DCTSIZE2 *
cinfo->num_components * 2 * DCTSIZE2 *
sizeof(int));
coef_bit_ptr = &cinfo->coef_bits[0][0];
for (ci = 0; ci < cinfo->num_components; ci++)

@ -8,7 +8,7 @@
* Copyright (C) 2010, 2015-2016, D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2019, Arm Limited.
* Copyright (C) 2019-2020, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -177,7 +177,7 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow];
outend = outptr + cinfo->output_width;
while (outptr < outend) {
invalue = *inptr++; /* don't need GETJSAMPLE() here */
invalue = *inptr++;
for (h = h_expand; h > 0; h--) {
*outptr++ = invalue;
}
@ -213,7 +213,7 @@ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[inrow];
outend = outptr + cinfo->output_width;
while (outptr < outend) {
invalue = *inptr++; /* don't need GETJSAMPLE() here */
invalue = *inptr++;
*outptr++ = invalue;
*outptr++ = invalue;
}
@ -242,7 +242,7 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow];
outend = outptr + cinfo->output_width;
while (outptr < outend) {
invalue = *inptr++; /* don't need GETJSAMPLE() here */
invalue = *inptr++;
*outptr++ = invalue;
*outptr++ = invalue;
}
@ -283,20 +283,20 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
inptr = input_data[inrow];
outptr = output_data[inrow];
/* Special case for first column */
invalue = GETJSAMPLE(*inptr++);
invalue = *inptr++;
*outptr++ = (JSAMPLE)invalue;
*outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2);
*outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
/* General case: 3/4 * nearer pixel + 1/4 * further pixel */
invalue = GETJSAMPLE(*inptr++) * 3;
*outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2);
*outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(*inptr) + 2) >> 2);
invalue = (*inptr++) * 3;
*outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
*outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2);
}
/* Special case for last column */
invalue = GETJSAMPLE(*inptr);
*outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2);
invalue = *inptr;
*outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
*outptr++ = (JSAMPLE)invalue;
}
}
@ -338,7 +338,7 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow++];
for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
thiscolsum = (*inptr0++) * 3 + (*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
}
}
@ -381,8 +381,8 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
outptr = output_data[outrow++];
/* Special case for first column */
thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
thiscolsum = (*inptr0++) * 3 + (*inptr1++);
nextcolsum = (*inptr0++) * 3 + (*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4);
*outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
lastcolsum = thiscolsum; thiscolsum = nextcolsum;
@ -390,7 +390,7 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
/* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
/* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
nextcolsum = (*inptr0++) * 3 + (*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
*outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
lastcolsum = thiscolsum; thiscolsum = nextcolsum;
@ -477,7 +477,13 @@ jinit_upsampler(j_decompress_ptr cinfo)
} else if (h_in_group == h_out_group &&
v_in_group * 2 == v_out_group && do_fancy) {
/* Non-fancy upsampling is handled by the generic method */
upsample->methods[ci] = h1v2_fancy_upsample;
#if defined(__arm__) || defined(__aarch64__) || \
defined(_M_ARM) || defined(_M_ARM64)
if (jsimd_can_h1v2_fancy_upsample())
upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
else
#endif
upsample->methods[ci] = h1v2_fancy_upsample;
upsample->pub.need_context_rows = TRUE;
} else if (h_in_group * 2 == h_out_group &&
v_in_group * 2 == v_out_group) {

@ -207,6 +207,10 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
#endif
#endif
JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
#if JPEG_LIB_VERSION < 70
JMESSAGE(JERR_BAD_DROP_SAMPLING,
"Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
#endif
#ifdef JMAKE_ENUM_LIST
@ -252,6 +256,15 @@ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
(cinfo)->err->msg_parm.i[2] = (p3), \
(cinfo)->err->msg_parm.i[3] = (p4), \
(*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
#define ERREXIT6(cinfo, code, p1, p2, p3, p4, p5, p6) \
((cinfo)->err->msg_code = (code), \
(cinfo)->err->msg_parm.i[0] = (p1), \
(cinfo)->err->msg_parm.i[1] = (p2), \
(cinfo)->err->msg_parm.i[2] = (p3), \
(cinfo)->err->msg_parm.i[3] = (p4), \
(cinfo)->err->msg_parm.i[4] = (p5), \
(cinfo)->err->msg_parm.i[5] = (p6), \
(*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
#define ERREXITS(cinfo, code, str) \
((cinfo)->err->msg_code = (code), \
strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \

@ -3,7 +3,7 @@
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1998, Thomas G. Lane.
* Modification developed 2002-2009 by Guido Vollbeding.
* Modification developed 2002-2018 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
@ -417,7 +417,7 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
/*
* Perform dequantization and inverse DCT on one block of coefficients,
* producing a 7x7 output block.
* producing a reduced-size 7x7 output block.
*
* Optimized algorithm with 12 multiplications in the 1-D kernel.
* cK represents sqrt(2) * cos(K*pi/14).
@ -1258,7 +1258,7 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
/*
* Perform dequantization and inverse DCT on one block of coefficients,
* producing a 11x11 output block.
* producing an 11x11 output block.
*
* Optimized algorithm with 24 multiplications in the 1-D kernel.
* cK represents sqrt(2) * cos(K*pi/22).
@ -2398,7 +2398,7 @@ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
/* Add fudge factor here for final descale. */
tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1);
tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */

@ -43,25 +43,11 @@
#if BITS_IN_JSAMPLE == 8
/* JSAMPLE should be the smallest type that will hold the values 0..255.
* You can use a signed char by having GETJSAMPLE mask it with 0xFF.
*/
#ifdef HAVE_UNSIGNED_CHAR
typedef unsigned char JSAMPLE;
#define GETJSAMPLE(value) ((int)(value))
#else /* not HAVE_UNSIGNED_CHAR */
typedef char JSAMPLE;
#ifdef __CHAR_UNSIGNED__
#define GETJSAMPLE(value) ((int)(value))
#else
#define GETJSAMPLE(value) ((int)(value) & 0xFF)
#endif /* __CHAR_UNSIGNED__ */
#endif /* HAVE_UNSIGNED_CHAR */
#define MAXJSAMPLE 255
#define CENTERJSAMPLE 128
@ -97,22 +83,9 @@ typedef short JCOEF;
* managers, this is also the data type passed to fread/fwrite.
*/
#ifdef HAVE_UNSIGNED_CHAR
typedef unsigned char JOCTET;
#define GETJOCTET(value) (value)
#else /* not HAVE_UNSIGNED_CHAR */
typedef char JOCTET;
#ifdef __CHAR_UNSIGNED__
#define GETJOCTET(value) (value)
#else
#define GETJOCTET(value) ((value) & 0xFF)
#endif /* __CHAR_UNSIGNED__ */
#endif /* HAVE_UNSIGNED_CHAR */
/* These typedefs are used for various table entries and so forth.
* They must be at least as wide as specified; but making them too big
@ -123,15 +96,7 @@ typedef char JOCTET;
/* UINT8 must hold at least the values 0..255. */
#ifdef HAVE_UNSIGNED_CHAR
typedef unsigned char UINT8;
#else /* not HAVE_UNSIGNED_CHAR */
#ifdef __CHAR_UNSIGNED__
typedef char UINT8;
#else /* not __CHAR_UNSIGNED__ */
typedef short UINT8;
#endif /* __CHAR_UNSIGNED__ */
#endif /* HAVE_UNSIGNED_CHAR */
/* UINT16 must hold at least the values 0..65535. */

@ -5,7 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 1997-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2015-2016, D. R. Commander.
* Copyright (C) 2015-2016, 2019, D. R. Commander.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@ -158,6 +158,9 @@ struct jpeg_decomp_master {
JDIMENSION first_MCU_col[MAX_COMPONENTS];
JDIMENSION last_MCU_col[MAX_COMPONENTS];
boolean jinit_upsampler_no_alloc;
/* Last iMCU row that was successfully decoded */
JDIMENSION last_good_iMCU_row;
};
/* Input control module */

@ -479,7 +479,7 @@ color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
for (col = width; col > 0; col--) {
pixcode = 0;
for (ci = 0; ci < nc; ci++) {
pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
pixcode += colorindex[ci][*ptrin++];
}
*ptrout++ = (JSAMPLE)pixcode;
}
@ -506,9 +506,9 @@ color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
ptrin = input_buf[row];
ptrout = output_buf[row];
for (col = width; col > 0; col--) {
pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
pixcode = colorindex0[*ptrin++];
pixcode += colorindex1[*ptrin++];
pixcode += colorindex2[*ptrin++];
*ptrout++ = (JSAMPLE)pixcode;
}
}
@ -552,7 +552,7 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
* required amount of padding.
*/
*output_ptr +=
colorindex_ci[GETJSAMPLE(*input_ptr) + dither[col_index]];
colorindex_ci[*input_ptr + dither[col_index]];
input_ptr += nc;
output_ptr++;
col_index = (col_index + 1) & ODITHER_MASK;
@ -595,12 +595,9 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
col_index = 0;
for (col = width; col > 0; col--) {
pixcode =
GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + dither0[col_index]]);
pixcode +=
GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + dither1[col_index]]);
pixcode +=
GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + dither2[col_index]]);
pixcode = colorindex0[(*input_ptr++) + dither0[col_index]];
pixcode += colorindex1[(*input_ptr++) + dither1[col_index]];
pixcode += colorindex2[(*input_ptr++) + dither2[col_index]];
*output_ptr++ = (JSAMPLE)pixcode;
col_index = (col_index + 1) & ODITHER_MASK;
}
@ -677,15 +674,15 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
* The maximum error is +- MAXJSAMPLE; this sets the required size
* of the range_limit array.
*/
cur += GETJSAMPLE(*input_ptr);
cur = GETJSAMPLE(range_limit[cur]);
cur += *input_ptr;
cur = range_limit[cur];
/* Select output value, accumulate into output code for this pixel */
pixcode = GETJSAMPLE(colorindex_ci[cur]);
pixcode = colorindex_ci[cur];
*output_ptr += (JSAMPLE)pixcode;
/* Compute actual representation error at this pixel */
/* Note: we can do this even though we don't have the final */
/* pixel code, because the colormap is orthogonal. */
cur -= GETJSAMPLE(colormap_ci[pixcode]);
cur -= colormap_ci[pixcode];
/* Compute error fractions to be propagated to adjacent pixels.
* Add these into the running sums, and simultaneously shift the
* next-line error sums left by 1 column.

@ -215,9 +215,9 @@ prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
ptr = input_buf[row];
for (col = width; col > 0; col--) {
/* get pixel value and index into the histogram */
histp = &histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
[GETJSAMPLE(ptr[1]) >> C1_SHIFT]
[GETJSAMPLE(ptr[2]) >> C2_SHIFT];
histp = &histogram[ptr[0] >> C0_SHIFT]
[ptr[1] >> C1_SHIFT]
[ptr[2] >> C2_SHIFT];
/* increment, check for overflow and undo increment if so. */
if (++(*histp) <= 0)
(*histp)--;
@ -665,7 +665,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
for (i = 0; i < numcolors; i++) {
/* We compute the squared-c0-distance term, then add in the other two. */
x = GETJSAMPLE(cinfo->colormap[0][i]);
x = cinfo->colormap[0][i];
if (x < minc0) {
tdist = (x - minc0) * C0_SCALE;
min_dist = tdist * tdist;
@ -688,7 +688,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
}
}
x = GETJSAMPLE(cinfo->colormap[1][i]);
x = cinfo->colormap[1][i];
if (x < minc1) {
tdist = (x - minc1) * C1_SCALE;
min_dist += tdist * tdist;
@ -710,7 +710,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
}
}
x = GETJSAMPLE(cinfo->colormap[2][i]);
x = cinfo->colormap[2][i];
if (x < minc2) {
tdist = (x - minc2) * C2_SCALE;
min_dist += tdist * tdist;
@ -788,13 +788,13 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
#define STEP_C2 ((1 << C2_SHIFT) * C2_SCALE)
for (i = 0; i < numcolors; i++) {
icolor = GETJSAMPLE(colorlist[i]);
icolor = colorlist[i];
/* Compute (square of) distance from minc0/c1/c2 to this color */
inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE;
dist0 = inc0 * inc0;
inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE;
dist0 += inc1 * inc1;
inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE;
dist0 += inc2 * inc2;
/* Form the initial difference increments */
inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
@ -879,7 +879,7 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2)
for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
cachep = &histogram[c0 + ic0][c1 + ic1][c2];
for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
*cachep++ = (histcell)(GETJSAMPLE(*cptr++) + 1);
*cachep++ = (histcell)((*cptr++) + 1);
}
}
}
@ -909,9 +909,9 @@ pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
outptr = output_buf[row];
for (col = width; col > 0; col--) {
/* get pixel value and index into the cache */
c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
c0 = (*inptr++) >> C0_SHIFT;
c1 = (*inptr++) >> C1_SHIFT;
c2 = (*inptr++) >> C2_SHIFT;
cachep = &histogram[c0][c1][c2];
/* If we have not seen this color before, find nearest colormap entry */
/* and update the cache */
@ -996,12 +996,12 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
* The maximum error is +- MAXJSAMPLE (or less with error limiting);
* this sets the required size of the range_limit array.
*/
cur0 += GETJSAMPLE(inptr[0]);
cur1 += GETJSAMPLE(inptr[1]);
cur2 += GETJSAMPLE(inptr[2]);
cur0 = GETJSAMPLE(range_limit[cur0]);
cur1 = GETJSAMPLE(range_limit[cur1]);
cur2 = GETJSAMPLE(range_limit[cur2]);
cur0 += inptr[0];
cur1 += inptr[1];
cur2 += inptr[2];
cur0 = range_limit[cur0];
cur1 = range_limit[cur1];
cur2 = range_limit[cur2];
/* Index into the cache with adjusted pixel value */
cachep =
&histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT];
@ -1015,9 +1015,9 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
register int pixcode = *cachep - 1;
*outptr = (JSAMPLE)pixcode;
/* Compute representation error for this pixel */
cur0 -= GETJSAMPLE(colormap0[pixcode]);
cur1 -= GETJSAMPLE(colormap1[pixcode]);
cur2 -= GETJSAMPLE(colormap2[pixcode]);
cur0 -= colormap0[pixcode];
cur1 -= colormap1[pixcode];
cur2 -= colormap2[pixcode];
}
/* Compute error fractions to be propagated to adjacent pixels.
* Add these into the running sums, and simultaneously shift the

@ -4,6 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, 2014, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -75,6 +76,7 @@ EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo,
EXTERN(int) jsimd_can_h2v2_fancy_upsample(void);
EXTERN(int) jsimd_can_h2v1_fancy_upsample(void);
EXTERN(int) jsimd_can_h1v2_fancy_upsample(void);
EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
jpeg_component_info *compptr,
@ -84,6 +86,10 @@ EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
jpeg_component_info *compptr,
JSAMPARRAY input_data,
JSAMPARRAY *output_data_ptr);
EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,
jpeg_component_info *compptr,
JSAMPARRAY input_data,
JSAMPARRAY *output_data_ptr);
EXTERN(int) jsimd_can_h2v2_merged_upsample(void);
EXTERN(int) jsimd_can_h2v1_merged_upsample(void);

@ -4,6 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -169,6 +170,12 @@ jsimd_can_h2v1_fancy_upsample(void)
return 0;
}
GLOBAL(int)
jsimd_can_h1v2_fancy_upsample(void)
{
return 0;
}
GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
@ -181,6 +188,12 @@ jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
{
}
GLOBAL(void)
jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
}
GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)
{

@ -2,9 +2,9 @@
* jversion.h
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
* Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2010, 2012-2020, D. R. Commander.
* Copyright (C) 2010, 2012-2021, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -37,9 +37,9 @@
*/
#define JCOPYRIGHT \
"Copyright (C) 2009-2020 D. R. Commander\n" \
"Copyright (C) 2009-2021 D. R. Commander\n" \
"Copyright (C) 2015, 2020 Google, Inc.\n" \
"Copyright (C) 2019 Arm Limited\n" \
"Copyright (C) 2019-2020 Arm Limited\n" \
"Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
"Copyright (C) 2015 Intel Corporation\n" \
@ -48,7 +48,7 @@
"Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
"Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"
"Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
#define JCOPYRIGHT_SHORT \
"Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
"Copyright (C) 1991-2021 The libjpeg-turbo Project and many others"

@ -239,7 +239,7 @@ OCV_OPTION(WITH_CAP_IOS "Enable iOS video capture" ON
VISIBLE_IF IOS
VERIFY HAVE_CAP_IOS)
OCV_OPTION(WITH_CAROTENE "Use NVidia carotene acceleration library for ARM platform" ON
VISIBLE_IF (ARM OR AARCH64) AND NOT IOS AND NOT (CMAKE_VERSION VERSION_LESS "2.8.11"))
VISIBLE_IF (ARM OR AARCH64) AND NOT IOS)
OCV_OPTION(WITH_CPUFEATURES "Use cpufeatures Android library" ON
VISIBLE_IF ANDROID
VERIFY HAVE_CPUFEATURES)
@ -499,7 +499,7 @@ OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors"
OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with native libraries" OFF IF ANDROID )
OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF )
OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
OCV_OPTION(ENABLE_GNU_STL_DEBUG "Enable GNU STL Debug mode (defines _GLIBCXX_DEBUG)" OFF IF ((NOT CMAKE_VERSION VERSION_LESS "2.8.11") AND CV_GCC) )
OCV_OPTION(ENABLE_GNU_STL_DEBUG "Enable GNU STL Debug mode (defines _GLIBCXX_DEBUG)" OFF IF CV_GCC )
OCV_OPTION(ENABLE_BUILD_HARDENING "Enable hardening of the resulting binaries (against security attacks, detects memory corruption, etc)" OFF)
OCV_OPTION(ENABLE_LTO "Enable Link Time Optimization" OFF IF CV_GCC OR MSVC)
OCV_OPTION(ENABLE_THIN_LTO "Enable Thin LTO" OFF IF CV_CLANG)
@ -511,6 +511,7 @@ OCV_OPTION(CV_TRACE "Enable OpenCV code trace" ON)
OCV_OPTION(OPENCV_GENERATE_SETUPVARS "Generate setup_vars* scripts" ON IF (NOT ANDROID AND NOT APPLE_FRAMEWORK) )
OCV_OPTION(ENABLE_CONFIG_VERIFICATION "Fail build if actual configuration doesn't match requested (WITH_XXX != HAVE_XXX)" OFF)
OCV_OPTION(OPENCV_ENABLE_MEMALIGN "Enable posix_memalign or memalign usage" ON)
OCV_OPTION(OPENCV_DISABLE_FILESYSTEM_SUPPORT "Disable filesystem support" OFF)
OCV_OPTION(ENABLE_PYLINT "Add target with Pylint checks" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) )
OCV_OPTION(ENABLE_FLAKE8 "Add target with Python flake8 checker" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) )
@ -522,6 +523,10 @@ if(ENABLE_IMPL_COLLECTION)
add_definitions(-DCV_COLLECT_IMPL_DATA)
endif()
if(OPENCV_DISABLE_FILESYSTEM_SUPPORT)
add_definitions(-DOPENCV_HAVE_FILESYSTEM_SUPPORT=0)
endif()
set(OPENCV_MATHJAX_RELPATH "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0" CACHE STRING "URI to a MathJax installation")
# ----------------------------------------------------------------------------
@ -655,6 +660,8 @@ if(UNIX)
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} m pthread)
elseif(EMSCRIPTEN)
# no need to link to system libs with emscripten
elseif(QNXNTO)
# no need to link to system libs with QNX
else()
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m pthread rt)
endif()
@ -1048,7 +1055,6 @@ endif()
status("")
status(" Platform:")
if(NOT DEFINED OPENCV_TIMESTAMP
AND NOT CMAKE_VERSION VERSION_LESS 2.8.11
AND NOT BUILD_INFO_SKIP_TIMESTAMP
)
string(TIMESTAMP OPENCV_TIMESTAMP "" UTC)
@ -1133,6 +1139,10 @@ endif()
status(" ccache:" OPENCV_COMPILER_IS_CCACHE THEN YES ELSE NO)
status(" Precompiled headers:" PCHSupport_FOUND AND ENABLE_PRECOMPILED_HEADERS THEN YES ELSE NO)
if(OPENCV_DISABLE_FILESYSTEM_SUPPORT)
status(" Filesystem support is disabled")
endif()
# ========================== Dependencies ============================
ocv_get_all_libs(deps_modules deps_extra deps_3rdparty)
status(" Extra dependencies:" ${deps_extra})
@ -1432,7 +1442,16 @@ if(WITH_LIBREALSENSE OR HAVE_LIBREALSENSE)
endif()
if(WITH_MFX OR HAVE_MFX)
status(" Intel Media SDK:" HAVE_MFX THEN "YES (${MFX_LIBRARY})" ELSE NO)
if(HAVE_MFX)
if(MFX_LIBRARY)
set(__details " (${MFX_LIBRARY})")
elseif(MFX_LIBRARIES)
set(__details " (${MFX_LIBRARIES})")
else()
set(__details " (unknown)")
endif()
endif()
status(" Intel Media SDK:" HAVE_MFX THEN "YES${__details}" ELSE NO)
endif()
if(WITH_GPHOTO2 OR HAVE_GPHOTO2)

@ -227,9 +227,11 @@ if(CV_GCC OR CV_CLANG)
if(APPLE)
set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS} -Wl,-dead_strip")
set(OPENCV_EXTRA_SHARED_LINKER_FLAGS "${OPENCV_EXTRA_SHARED_LINKER_FLAGS} -Wl,-dead_strip")
set(OPENCV_EXTRA_MODULE_LINKER_FLAGS "${OPENCV_EXTRA_MODULE_LINKER_FLAGS} -Wl,-dead_strip")
else()
set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS} -Wl,--gc-sections")
set(OPENCV_EXTRA_SHARED_LINKER_FLAGS "${OPENCV_EXTRA_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
set(OPENCV_EXTRA_MODULE_LINKER_FLAGS "${OPENCV_EXTRA_MODULE_LINKER_FLAGS} -Wl,--gc-sections")
endif()
endif()
endif()
@ -281,6 +283,7 @@ if(MSVC)
set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "${OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE} /debug")
set(OPENCV_EXTRA_SHARED_LINKER_FLAGS_RELEASE "${OPENCV_EXTRA_SHARED_LINKER_FLAGS_RELEASE} /debug")
set(OPENCV_EXTRA_MODULE_LINKER_FLAGS_RELEASE "${OPENCV_EXTRA_MODULE_LINKER_FLAGS_RELEASE} /debug")
endif()
# Remove unreferenced functions: function level linking
@ -350,6 +353,7 @@ if(NOT OPENCV_SKIP_LINK_AS_NEEDED)
if(HAVE_LINK_AS_NEEDED)
set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS} ${_option}")
set(OPENCV_EXTRA_SHARED_LINKER_FLAGS "${OPENCV_EXTRA_SHARED_LINKER_FLAGS} ${_option}")
set(OPENCV_EXTRA_MODULE_LINKER_FLAGS "${OPENCV_EXTRA_MODULE_LINKER_FLAGS} ${_option}")
endif()
endif()
endif()
@ -368,6 +372,9 @@ if(NOT OPENCV_SKIP_EXTRA_COMPILER_FLAGS)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OPENCV_EXTRA_SHARED_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${OPENCV_EXTRA_SHARED_LINKER_FLAGS_RELEASE}")
set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} ${OPENCV_EXTRA_SHARED_LINKER_FLAGS_DEBUG}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${OPENCV_EXTRA_MODULE_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} ${OPENCV_EXTRA_MODULE_LINKER_FLAGS_RELEASE}")
set(CMAKE_MODULE_LINKER_FLAGS_DEBUG "${CMAKE_MODULE_LINKER_FLAGS_DEBUG} ${OPENCV_EXTRA_MODULE_LINKER_FLAGS_DEBUG}")
endif()
if(MSVC)

@ -134,10 +134,14 @@ endif()
# Add more features to the target
if(INF_ENGINE_TARGET)
if(InferenceEngine_VERSION VERSION_GREATER_EQUAL "2021.4")
math(EXPR INF_ENGINE_RELEASE "${InferenceEngine_VERSION_MAJOR} * 1000000 + ${InferenceEngine_VERSION_MINOR} * 10000 + ${InferenceEngine_VERSION_PATCH} * 100")
endif()
if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version has not been set, 2021.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
set(INF_ENGINE_RELEASE "2021030000")
endif()
set(INF_ENGINE_RELEASE "2021030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set(INF_ENGINE_RELEASE "${INF_ENGINE_RELEASE}" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
)

@ -23,7 +23,7 @@ set(OPENCV_DOWNLOAD_LOG "${OpenCV_BINARY_DIR}/CMakeDownloadLog.txt")
set(OPENCV_DOWNLOAD_WITH_CURL "${OpenCV_BINARY_DIR}/download_with_curl.sh")
set(OPENCV_DOWNLOAD_WITH_WGET "${OpenCV_BINARY_DIR}/download_with_wget.sh")
set(OPENCV_DOWNLOAD_TRIES_LIST 1 CACHE STRING "List of download tries") # a list
set(OPENCV_DOWNLOAD_PARAMS INACTIVITY_TIMEOUT 60 TIMEOUT 600 CACHE STRING "Download parameters to be passed to file(DOWNLAOD ...)")
set(OPENCV_DOWNLOAD_PARAMS INACTIVITY_TIMEOUT 60 TIMEOUT 600 CACHE STRING "Download parameters to be passed to file(DOWNLOAD ...)")
mark_as_advanced(OPENCV_DOWNLOAD_TRIES_LIST OPENCV_DOWNLOAD_PARAMS)
# Init download cache directory and log file and helper scripts

@ -11,7 +11,7 @@ if(WITH_WIN32UI)
CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=user32;gdi32")
endif()
# --- QT4 ---
# --- QT4/5 ---
ocv_clear_vars(HAVE_QT HAVE_QT5)
if(WITH_QT)
if(NOT WITH_QT EQUAL 4)
@ -34,41 +34,6 @@ if(WITH_QT)
endif()
endif()
# --- GTK ---
ocv_clear_vars(HAVE_GTK HAVE_GTK3 HAVE_GTHREAD HAVE_GTKGLEXT)
if(WITH_GTK AND NOT HAVE_QT)
if(NOT WITH_GTK_2_X)
ocv_check_modules(GTK3 gtk+-3.0)
if(HAVE_GTK3)
ocv_append_build_options(HIGHGUI GTK3)
set(HAVE_GTK TRUE)
endif()
endif()
if(NOT HAVE_GTK)
ocv_check_modules(GTK2 gtk+-2.0)
if(HAVE_GTK2)
if (GTK2_VERSION VERSION_LESS MIN_VER_GTK)
message (FATAL_ERROR "GTK support requires a minimum version of ${MIN_VER_GTK} (${GTK2_VERSION} found)")
else()
ocv_append_build_options(HIGHGUI GTK2)
set(HAVE_GTK TRUE)
endif()
endif()
endif()
ocv_check_modules(GTHREAD gthread-2.0)
if(HAVE_GTK AND NOT HAVE_GTHREAD)
message(FATAL_ERROR "gthread not found. This library is required when building with GTK support")
else()
ocv_append_build_options(HIGHGUI GTHREAD)
endif()
if(WITH_OPENGL AND NOT HAVE_GTK3)
ocv_check_modules(GTKGLEXT gtkglext-1.0)
if(HAVE_GTKGLEXT)
ocv_append_build_options(HIGHGUI GTKGLEXT)
endif()
endif()
endif()
# --- OpenGl ---
ocv_clear_vars(HAVE_OPENGL HAVE_QT_OPENGL)
if(WITH_OPENGL)

@ -9,6 +9,14 @@
# OPENEXR_LIBRARIES = libraries that are needed to use OpenEXR.
#
find_package(OpenEXR 3.0 CONFIG QUIET)
if(TARGET OpenEXR::OpenEXR)
SET(OPENEXR_FOUND TRUE)
SET(OPENEXR_LIBRARIES OpenEXR::OpenEXR)
SET(OPENEXR_VERSION ${OpenEXR_VERSION})
return()
endif()
SET(OPENEXR_LIBRARIES "")
SET(OPENEXR_LIBSEARCH_SUFFIXES "")
file(TO_CMAKE_PATH "$ENV{ProgramFiles}" ProgramFiles_ENV_PATH)

@ -6,4 +6,3 @@ set(MIN_VER_CUDNN 7.5)
set(MIN_VER_PYTHON2 2.7)
set(MIN_VER_PYTHON3 3.2)
set(MIN_VER_ZLIB 1.2.3)
set(MIN_VER_GTK 2.18.0)

@ -880,7 +880,9 @@ macro(_ocv_create_module)
ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
set(__module_headers ${OPENCV_MODULE_${the_module}_HEADERS})
list(SORT __module_headers) # fix headers order, useful for bindings
if(__module_headers)
list(SORT __module_headers) # fix headers order, useful for bindings
endif()
set(OPENCV_MODULE_${the_module}_HEADERS ${__module_headers} CACHE INTERNAL "List of header files for ${the_module}")
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
@ -1181,6 +1183,9 @@ function(ocv_add_perf_tests)
if(TARGET opencv_videoio_plugins)
add_dependencies(${the_target} opencv_videoio_plugins)
endif()
if(TARGET opencv_highgui_plugins)
add_dependencies(${the_target} opencv_highgui_plugins)
endif()
if(HAVE_HPX)
message("Linking HPX to Perf test of module ${name}")
@ -1276,6 +1281,9 @@ function(ocv_add_accuracy_tests)
if(TARGET opencv_videoio_plugins)
add_dependencies(${the_target} opencv_videoio_plugins)
endif()
if(TARGET opencv_highgui_plugins)
add_dependencies(${the_target} opencv_highgui_plugins)
endif()
if(HAVE_HPX)
message("Linking HPX to Perf test of module ${name}")
@ -1366,6 +1374,9 @@ function(ocv_add_samples)
if(TARGET opencv_videoio_plugins)
add_dependencies(${the_target} opencv_videoio_plugins)
endif()
if(TARGET opencv_highgui_plugins)
add_dependencies(${the_target} opencv_highgui_plugins)
endif()
if(INSTALL_BIN_EXAMPLES)
install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${module_id}" COMPONENT samples)

@ -866,7 +866,9 @@ macro(ocv_check_modules define)
foreach(flag ${${define}_LDFLAGS})
if(flag MATCHES "^-L(.*)")
list(APPEND _libs_paths ${CMAKE_MATCH_1})
elseif(IS_ABSOLUTE "${flag}")
elseif(IS_ABSOLUTE "${flag}"
OR flag STREQUAL "-lstdc++"
)
list(APPEND _libs "${flag}")
elseif(flag MATCHES "^-l(.*)")
set(_lib "${CMAKE_MATCH_1}")
@ -1578,24 +1580,41 @@ endfunction()
function(ocv_add_external_target name inc link def)
if(BUILD_SHARED_LIBS)
if(BUILD_SHARED_LIBS AND link)
set(imp IMPORTED)
endif()
add_library(ocv.3rdparty.${name} INTERFACE ${imp})
set_target_properties(ocv.3rdparty.${name} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${inc}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${inc}"
INTERFACE_COMPILE_DEFINITIONS "${def}")
# When cmake version is greater than or equal to 3.11, INTERFACE_LINK_LIBRARIES no longer applies to interface library
# See https://github.com/opencv/opencv/pull/18658
if (CMAKE_VERSION VERSION_LESS 3.11)
set_target_properties(ocv.3rdparty.${name} PROPERTIES
INTERFACE_LINK_LIBRARIES "${link}")
else()
target_link_libraries(ocv.3rdparty.${name} INTERFACE ${link})
if(def)
if(NOT (CMAKE_VERSION VERSION_LESS "3.11.0")) # https://gitlab.kitware.com/cmake/cmake/-/merge_requests/1264 : eliminates "Cannot specify compile definitions for imported target" error message
target_compile_definitions(ocv.3rdparty.${name} INTERFACE "${def}")
else()
set_target_properties(ocv.3rdparty.${name} PROPERTIES INTERFACE_COMPILE_DEFINITIONS "${def}")
endif()
endif()
if(inc)
if(NOT (CMAKE_VERSION VERSION_LESS "3.11.0")) # https://gitlab.kitware.com/cmake/cmake/-/merge_requests/1264 : eliminates "Cannot specify compile definitions for imported target" error message
target_include_directories(ocv.3rdparty.${name} SYSTEM INTERFACE "$<BUILD_INTERFACE:${inc}>")
else()
set_target_properties(ocv.3rdparty.${name} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "$<BUILD_INTERFACE:${inc}>"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "$<BUILD_INTERFACE:${inc}>"
)
endif()
endif()
if(link)
# When cmake version is greater than or equal to 3.11, INTERFACE_LINK_LIBRARIES no longer applies to interface library
# See https://github.com/opencv/opencv/pull/18658
if(CMAKE_VERSION VERSION_LESS 3.11)
set_target_properties(ocv.3rdparty.${name} PROPERTIES
INTERFACE_LINK_LIBRARIES "${link}")
else()
target_link_libraries(ocv.3rdparty.${name} INTERFACE ${link})
endif()
endif()
#
if(NOT BUILD_SHARED_LIBS)
# to install used target only upgrade CMake
if(NOT BUILD_SHARED_LIBS
AND CMAKE_VERSION VERSION_LESS "3.13.0" # https://gitlab.kitware.com/cmake/cmake/-/merge_requests/2152
)
install(TARGETS ocv.3rdparty.${name} EXPORT OpenCVModules)
endif()
endfunction()

@ -28,9 +28,6 @@
/* Clp support */
#cmakedefine HAVE_CLP
/* Cocoa API */
#cmakedefine HAVE_COCOA
/* NVIDIA CUDA Runtime API*/
#cmakedefine HAVE_CUDA
@ -56,12 +53,6 @@
/* Geospatial Data Abstraction Library */
#cmakedefine HAVE_GDAL
/* GTK+ 2.0 Thread support */
#cmakedefine HAVE_GTHREAD
/* GTK+ 2.x toolkit */
#cmakedefine HAVE_GTK
/* Halide support */
#cmakedefine HAVE_HALIDE
@ -121,12 +112,6 @@
/* parallel_for with pthreads */
#cmakedefine HAVE_PTHREADS_PF
/* Qt support */
#cmakedefine HAVE_QT
/* Qt OpenGL support */
#cmakedefine HAVE_QT_OPENGL
/* Intel Threading Building Blocks */
#cmakedefine HAVE_TBB

@ -31,7 +31,7 @@ MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 4
ALIASES += add_toggle{1}="@htmlonly[block] <div class='newInnerHTML'>\1</div><div> <script type="text/javascript"> addToggle(); </script>@endhtmlonly"
ALIASES += add_toggle{1}="@htmlonly[block] <div class='newInnerHTML'>\1</div><div> <script type='text/javascript'> addToggle(); </script>@endhtmlonly"
ALIASES += add_toggle_cpp="@htmlonly[block] <div class='newInnerHTML' title='cpp' style='display: none;'>C++</div><div class='toggleable_div label_cpp' style='display: none;'>@endhtmlonly"
ALIASES += add_toggle_java="@htmlonly[block] <div class='newInnerHTML' title='java' style='display: none;'>Java</div><div class='toggleable_div label_java' style='display: none;'>@endhtmlonly"
ALIASES += add_toggle_python="@htmlonly[block] <div class='newInnerHTML' title='python' style='display: none;'>Python</div><div class='toggleable_div label_python' style='display: none;'>@endhtmlonly"

@ -850,12 +850,12 @@
journal = {IEEE Transactions on Robotics and Automation},
title = {Robot sensor calibration: solving AX=XB on the Euclidean group},
year = {1994},
month = oct,
volume = {10},
number = {5},
pages = {717-721},
doi = {10.1109/70.326576},
ISSN = {1042-296X},
month = {Oct}
issn = {1042-296X}
}
@inproceedings{PM03,
author = {P{\'e}rez, Patrick and Gangnet, Michel and Blake, Andrew},
@ -1051,12 +1051,12 @@
journal = {IEEE Transactions on Robotics and Automation},
title = {A new technique for fully autonomous and efficient 3D robotics hand/eye calibration},
year = {1989},
month = jun,
volume = {5},
number = {3},
pages = {345-358},
doi = {10.1109/70.34770},
ISSN = {1042-296X},
month = {June}
issn = {1042-296X}
}
@inproceedings{UES01,
author = {Uyttendaele, Matthew and Eden, Ashley and Skeliski, R},
@ -1324,3 +1324,13 @@
pages={5551--5560},
year={2017}
}
@article{umeyama1991least,
title={Least-squares estimation of transformation parameters between two point patterns},
author={Umeyama, Shinji},
journal={IEEE Computer Architecture Letters},
volume={13},
number={04},
pages={376--380},
year={1991},
publisher={IEEE Computer Society}
}

@ -40,12 +40,12 @@ using **cv.Sobel()**).
Then comes the main part. After this, they created a score, basically an equation, which
determines if a window can contain a corner or not.
\f[R = det(M) - k(trace(M))^2\f]
\f[R = \det(M) - k(\operatorname{trace}(M))^2\f]
where
- \f$det(M) = \lambda_1 \lambda_2\f$
- \f$trace(M) = \lambda_1 + \lambda_2\f$
- \f$\lambda_1\f$ and \f$\lambda_2\f$ are the eigenvalues of M
- \f$\det(M) = \lambda_1 \lambda_2\f$
- \f$\operatorname{trace}(M) = \lambda_1 + \lambda_2\f$
- \f$\lambda_1\f$ and \f$\lambda_2\f$ are the eigenvalues of \f$M\f$
So the magnitudes of these eigenvalues decide whether a region is a corner, an edge, or flat.

@ -20,7 +20,7 @@ Harris Corner Detector. The scoring function in Harris Corner Detector was given
Instead of this, Shi-Tomasi proposed:
\f[R = min(\lambda_1, \lambda_2)\f]
\f[R = \min(\lambda_1, \lambda_2)\f]
If it is a greater than a threshold value, it is considered as a corner. If we plot it in
\f$\lambda_1 - \lambda_2\f$ space as we did in Harris Corner Detector, we get an image as below:
@ -28,7 +28,7 @@ If it is a greater than a threshold value, it is considered as a corner. If we p
![image](images/shitomasi_space.png)
From the figure, you can see that only when \f$\lambda_1\f$ and \f$\lambda_2\f$ are above a minimum value,
\f$\lambda_{min}\f$, it is considered as a corner(green region).
\f$\lambda_{\min}\f$, it is considered as a corner(green region).
Code
----

@ -156,7 +156,7 @@ sift = cv.SIFT_create()
kp, des = sift.detectAndCompute(gray,None)
@endcode
Here kp will be a list of keypoints and des is a numpy array of shape
\f$Number\_of\_Keypoints \times 128\f$.
\f$\text{(Number of Keypoints)} \times 128\f$.
So we got keypoints, descriptors etc. Now we want to see how to match keypoints in different images.
That we will learn in coming chapters.

@ -107,17 +107,10 @@ def add_signature_to_table(soup, table, signature, language, type):
""" Add a signature to an html table"""
row = soup.new_tag('tr')
row.append(soup.new_tag('td', style='width: 20px;'))
if 'ret' in signature:
row.append(append(soup.new_tag('td'), signature['ret']))
row.append(append(soup.new_tag('td'), '='))
else:
row.append(soup.new_tag('td')) # return values
row.append(soup.new_tag('td')) # '='
row.append(append(soup.new_tag('td'), signature['name'] + '('))
row.append(append(soup.new_tag('td', **{'class': 'paramname'}), signature['arg']))
row.append(append(soup.new_tag('td'), ')'))
row.append(append(soup.new_tag('td'), ') -> '))
row.append(append(soup.new_tag('td'), signature['ret']))
table.append(row)

@ -87,7 +87,7 @@ The tutorial consists of two main programs:
The application starts up extracting the ORB features and descriptors from the input image and
then uses the mesh along with the [Möller–Trumbore intersection
algorithm](http://http://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm/)
algorithm](http://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm/)
to compute the 3D coordinates of the found features. Finally, the 3D points and the descriptors
are stored in different lists in a file with YAML format which each row is a different point. The
technical background on how to store the files can be found in the @ref tutorial_file_input_output_with_xml_yml

@ -396,13 +396,14 @@ There are multiple less popular frameworks which can be used to read and write v
### videoio plugins
Some _videoio_ backends can be built as plugins thus breaking strict dependency on third-party libraries and making them optional at runtime. Following options can be used to control this mechanism:
Since version 4.1.0 some _videoio_ backends can be built as plugins thus breaking strict dependency on third-party libraries and making them optional at runtime. Following options can be used to control this mechanism:
| Option | Default | Description |
| --------| ------ | ------- |
| `VIDEOIO_ENABLE_PLUGINS` | _ON_ | Enable or disable plugins completely. |
| `VIDEOIO_PLUGIN_LIST` | _empty_ | Comma- or semicolon-separated list of backend names to be compiled as plugins. Supported names are _ffmpeg_, _gstreamer_, _msmf_, _mfx_ and _all_. |
| `VIDEOIO_ENABLE_STRICT_PLUGIN_CHECK` | _ON_ | Enable strict runtime version check to only allow plugins built with the same version of OpenCV. |
Check @ref tutorial_general_install for standalone plugins build instructions.
## Parallel processing {#tutorial_config_reference_func_core}
@ -421,6 +422,17 @@ Some of OpenCV algorithms can use multithreading to accelerate processing. OpenC
@note OpenCV can download and build TBB library from GitHub, this functionality can be enabled with the `BUILD_TBB` option.
### Threading plugins
Since version 4.5.2 OpenCV supports dynamically loaded threading backends. At this moment only separate compilation process is supported: first you have to build OpenCV with some _default_ parallel backend (e.g. pthreads), then build each plugin and copy resulting binaries to the _lib_ or _bin_ folder.
| Option | Default | Description |
| ------ | ------- | ----------- |
| PARALLEL_ENABLE_PLUGINS | ON | Enable plugin support, if this option is disabled OpenCV will not try to load anything |
Check @ref tutorial_general_install for standalone plugins build instructions.
## GUI backends (highgui module) {#tutorial_config_reference_highgui}
OpenCV relies on various GUI libraries for window drawing.
@ -442,6 +454,18 @@ OpenCV relies on various GUI libraries for window drawing.
OpenGL integration can be used to draw HW-accelerated windows with following backends: GTK, WIN32 and Qt. And enables basic interoperability with OpenGL, see @ref core_opengl and @ref highgui_opengl for details.
### highgui plugins
Since OpenCV 4.5.3 GTK backend can be build as a dynamically loaded plugin. Following options can be used to control this mechanism:
| Option | Default | Description |
| --------| ------ | ------- |
| `HIGHGUI_ENABLE_PLUGINS` | _ON_ | Enable or disable plugins completely. |
| `HIGHGUI_PLUGIN_LIST` | _empty_ | Comma- or semicolon-separated list of backend names to be compiled as plugins. Supported names are _gtk_, _gtk2_, _gtk3_, and _all_. |
Check @ref tutorial_general_install for standalone plugins build instructions.
## Deep learning neural networks inference backends and options (dnn module) {#tutorial_config_reference_dnn}
OpenCV have own DNN inference module which have own build-in engine, but can also use other libraries for optimized processing. Multiple backends can be enabled in single build. Selection happens at runtime automatically or manually.

@ -105,7 +105,7 @@ cmake --build <build-directory> <build-options>
make
```
## Step 3: Install {#tutorial_general_install_sources_4}
## (optional) Step 3: Install {#tutorial_general_install_sources_4}
During installation procedure build results and other files from build directory will be copied to the install location. Default installation location is `/usr/local` on UNIX and `C:/Program Files` on Windows. This location can be changed at the configuration step by setting `CMAKE_INSTALL_PREFIX` option. To perform installation run the following command:
```
@ -117,3 +117,32 @@ This step is optional, OpenCV can be used directly from the build directory.
@note
If the installation root location is a protected system directory, so the installation process must be run with superuser or administrator privileges (e.g. `sudo cmake ...`).
## (optional) Step 4: Build plugins {#tutorial_general_install_plugins_4}
It is possible to decouple some of OpenCV dependencies and make them optional by extracting parts of the code into dynamically-loaded plugins. It helps to produce adaptive binary distributions which can work on systems with less dependencies and extend functionality just by installing missing libraries. For now modules _core_, _videoio_ and _highgui_ support this mechanism for some of their dependencies. In some cases it is possible to build plugins together with OpenCV by setting options like `VIDEOIO_PLUGIN_LIST` or `HIGHGUI_PLUGIN_LIST`, more options related to this scenario can be found in the @ref tutorial_config_reference. In other cases plugins should be built separately in their own build procedure and this section describes such standalone build process.
@note It is recommended to use compiler, configuration and build options which are compatible to the one used for OpenCV build, otherwise resulting library can refuse to load or cause other runtime problems. Note that some functionality can be limited or work slower when backends are loaded dynamically due to extra barrier between OpenCV and corresponding third-party library.
Build procedure is similar to the main OpenCV build, but you have to use special CMake projects located in corresponding subdirectories, these folders can also contain reference scripts and Docker images. It is important to use `opencv_<module>_<backend>` name prefix for plugins so that loader is able to find them. Each supported prefix can be used to load only one library, however multiple candidates can be probed for a single prefix. For example, you can have _libopencv_videoio_ffmpeg_3.so_ and _libopencv_videoio_ffmpeg_4.so_ plugins and the first one which can be loaded successfully will occupy internal slot and stop probing process. Possible prefixes and project locations are presented in the table below:
| module | backends | location |
| ------ | -------- | -------- |
| core | parallel_tbb, parallel_onetbb, parallel_openmp | _opencv/modules/core/misc/plugins_ |
| highgui | gtk, gtk2, gtk3 | _opencv/modules/highgui/misc/plugins_ |
| videoio | ffmpeg, gstreamer, intel_mfx, msmf | _opencv/modules/videoio/misc_ |
Example:
```.sh
# set-up environment for TBB detection, for example:
# export TBB_DIR=<dir-with-tbb-cmake-config>
cmake -G<generator> \
-DOPENCV_PLUGIN_NAME=opencv_core_tbb_<suffix> \
-DOPENCV_PLUGIN_DESTINATION=<dest-folder> \
-DCMAKE_BUILD_TYPE=<config> \
<opencv>/modules/core/misc/plugins/parallel_tbb
cmake --build . --config <config>
```
@note On Windows plugins must be linked with existing OpenCV build. Set `OpenCV_DIR` environment or CMake variable to the directory with _OpenCVConfig.cmake_ file, it can be OpenCV build directory or some path in the location where you performed installation.

@ -108,7 +108,7 @@ CMake package files will be located in the build root:
## Install
@warning
Installation process only copies files to predefined locations and do minor patching. Library installed using this method is not integrated into the system package registry and can not be uninstalled automatically. We do not recommend system-wide installation to regular users due to possible conflicts with system packages.
The installation process only copies files to predefined locations and does minor patching. Installing using this method does not integrate opencv into the system package registry and thus, for example, opencv can not be uninstalled automatically. We do not recommend system-wide installation to regular users due to possible conflicts with system packages.
By default OpenCV will be installed to the `/usr/local` directory, all files will be copied to following locations:
* `/usr/local/bin` - executable files

@ -1852,6 +1852,33 @@ CV_EXPORTS_W int estimateAffine3D(InputArray src, InputArray dst,
OutputArray out, OutputArray inliers,
double ransacThreshold = 3, double confidence = 0.99);
/** @brief Computes an optimal affine transformation between two 3D point sets.
It computes \f$R,s,t\f$ minimizing \f$\sum{i} dst_i - c \cdot R \cdot src_i \f$
where \f$R\f$ is a 3x3 rotation matrix, \f$t\f$ is a 3x1 translation vector and \f$s\f$ is a
scalar size value. This is an implementation of the algorithm by Umeyama \cite umeyama1991least .
The estimated affine transform has a homogeneous scale which is a subclass of affine
transformations with 7 degrees of freedom. The paired point sets need to comprise at least 3
points each.
@param src First input 3D point set.
@param dst Second input 3D point set.
@param scale If null is passed, the scale parameter c will be assumed to be 1.0.
Else the pointed-to variable will be set to the optimal scale.
@param force_rotation If true, the returned rotation will never be a reflection.
This might be unwanted, e.g. when optimizing a transform between a right- and a
left-handed coordinate system.
@return 3D affine transformation matrix \f$3 \times 4\f$ of the form
\f[T =
\begin{bmatrix}
R & t\\
\end{bmatrix}
\f]
*/
CV_EXPORTS_W cv::Mat estimateAffine3D(InputArray src, InputArray dst,
CV_OUT double* scale = nullptr, bool force_rotation = true);
/** @brief Computes an optimal translation between two 3D point sets.
*
* It computes

@ -899,6 +899,86 @@ int estimateAffine3D(InputArray _from, InputArray _to,
return createRANSACPointSetRegistrator(makePtr<Affine3DEstimatorCallback>(), 4, ransacThreshold, confidence)->run(dFrom, dTo, _out, _inliers);
}
Mat estimateAffine3D(InputArray _from, InputArray _to,
CV_OUT double* _scale, bool force_rotation)
{
CV_INSTRUMENT_REGION();
Mat from = _from.getMat(), to = _to.getMat();
int count = from.checkVector(3);
CV_CheckGE(count, 3, "Umeyama algorithm needs at least 3 points for affine transformation estimation.");
CV_CheckEQ(to.checkVector(3), count, "Point sets need to have the same size");
from = from.reshape(1, count);
to = to.reshape(1, count);
if(from.type() != CV_64F)
from.convertTo(from, CV_64F);
if(to.type() != CV_64F)
to.convertTo(to, CV_64F);
const double one_over_n = 1./count;
const auto colwise_mean = [one_over_n](const Mat& m)
{
Mat my;
reduce(m, my, 0, REDUCE_SUM, CV_64F);
return my * one_over_n;
};
const auto demean = [count](const Mat& A, const Mat& mean)
{
Mat A_centered = Mat::zeros(count, 3, CV_64F);
for(int i = 0; i < count; i++)
{
A_centered.row(i) = A.row(i) - mean;
}
return A_centered;
};
Mat from_mean = colwise_mean(from);
Mat to_mean = colwise_mean(to);
Mat from_centered = demean(from, from_mean);
Mat to_centered = demean(to, to_mean);
Mat cov = to_centered.t() * from_centered * one_over_n;
Mat u,d,vt;
SVD::compute(cov, d, u, vt, SVD::MODIFY_A | SVD::FULL_UV);
CV_CheckGE(countNonZero(d), 2, "Points cannot be colinear");
Mat S = Mat::eye(3, 3, CV_64F);
// det(d) can only ever be >=0, so we can always use this here (compared to the original formula by Umeyama)
if (force_rotation && (determinant(u) * determinant(vt) < 0))
{
S.at<double>(2, 2) = -1;
}
Mat rmat = u*S*vt;
double scale = 1.0;
if (_scale)
{
double var_from = 0.;
scale = 0.;
for(int i = 0; i < 3; i++)
{
var_from += norm(from_centered.col(i), NORM_L2SQR);
scale += d.at<double>(i, 0) * S.at<double>(i, i);
}
double inverse_var = count / var_from;
scale *= inverse_var;
*_scale = scale;
}
Mat new_to = scale * rmat * from_mean.t();
Mat transform;
transform.create(3, 4, CV_64F);
Mat r_part(transform(Rect(0, 0, 3, 3)));
rmat.copyTo(r_part);
transform.col(3) = to_mean.t() - new_to;
return transform;
}
int estimateTranslation3D(InputArray _from, InputArray _to,
OutputArray _out, OutputArray _inliers,
double ransacThreshold, double confidence)

@ -402,7 +402,14 @@ bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints,
Ptr<usac::RansacOutput> ransac_output;
if (usac::run(model_params, imagePoints, objectPoints, model_params->getRandomGeneratorState(),
ransac_output, cameraMatrix, noArray(), distCoeffs, noArray())) {
usac::saveMask(inliers, ransac_output->getInliersMask());
if (inliers.needed()) {
const auto &inliers_mask = ransac_output->getInliersMask();
Mat inliers_;
for (int i = 0; i < (int)inliers_mask.size(); i++)
if (inliers_mask[i])
inliers_.push_back(i);
inliers_.copyTo(inliers);
}
const Mat &model = ransac_output->getModel();
model.col(0).copyTo(rvec);
model.col(1).copyTo(tvec);

@ -408,10 +408,11 @@ int mergePoints (InputArray pts1_, InputArray pts2_, Mat &pts, bool ispnp) {
void saveMask (OutputArray mask, const std::vector<bool> &inliers_mask) {
if (mask.needed()) {
const int points_size = (int) inliers_mask.size();
mask.create(points_size, 1, CV_8U);
auto * maskptr = mask.getMat().ptr<uchar>();
Mat tmp_mask(points_size, 1, CV_8U);
auto * maskptr = tmp_mask.ptr<uchar>();
for (int i = 0; i < points_size; i++)
maskptr[i] = (uchar) inliers_mask[i];
tmp_mask.copyTo(mask);
}
}
void setParameters (Ptr<Model> &params, EstimationMethod estimator, const UsacParams &usac_params,
@ -538,23 +539,26 @@ Mat findEssentialMat (InputArray points1, InputArray points2, InputArray cameraM
bool solvePnPRansac( InputArray objectPoints, InputArray imagePoints,
InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, OutputArray tvec,
bool /*useExtrinsicGuess*/, int max_iters, float thr, double conf,
OutputArray mask, int method) {
OutputArray inliers, int method) {
Ptr<Model> params;
setParameters(method, params, cameraMatrix.empty() ? EstimationMethod ::P6P : EstimationMethod ::P3P,
thr, max_iters, conf, mask.needed());
thr, max_iters, conf, inliers.needed());
Ptr<RansacOutput> ransac_output;
if (run(params, imagePoints, objectPoints, params->getRandomGeneratorState(),
ransac_output, cameraMatrix, noArray(), distCoeffs, noArray())) {
saveMask(mask, ransac_output->getInliersMask());
if (inliers.needed()) {
const auto &inliers_mask = ransac_output->getInliersMask();
Mat inliers_;
for (int i = 0; i < (int)inliers_mask.size(); i++)
if (inliers_mask[i])
inliers_.push_back(i);
inliers_.copyTo(inliers);
}
const Mat &model = ransac_output->getModel();
model.col(0).copyTo(rvec);
model.col(1).copyTo(tvec);
return true;
}
if (mask.needed()){
mask.create(std::max(objectPoints.getMat().rows, objectPoints.getMat().cols), 1, CV_8U);
mask.setTo(Scalar::all(0));
}
return false;
}

@ -41,6 +41,7 @@
//M*/
#include "test_precomp.hpp"
#include "opencv2/core/affine.hpp"
namespace opencv_test { namespace {
@ -201,4 +202,25 @@ TEST(Calib3d_EstimateAffine3D, regression_16007)
EXPECT_EQ(1, res);
}
TEST(Calib3d_EstimateAffine3D, umeyama_3_pt)
{
std::vector<cv::Vec3d> points = {{{0.80549149, 0.8225781, 0.79949521},
{0.28906756, 0.57158557, 0.9864789},
{0.58266182, 0.65474983, 0.25078834}}};
cv::Mat R = (cv::Mat_<double>(3,3) << 0.9689135, -0.0232753, 0.2463025,
0.0236362, 0.9997195, 0.0014915,
-0.2462682, 0.0043765, 0.9691918);
cv::Vec3d t(1., 2., 3.);
cv::Affine3d transform(R, t);
std::vector<cv::Vec3d> transformed_points(points.size());
std::transform(points.begin(), points.end(), transformed_points.begin(), [transform](const cv::Vec3d v){return transform * v;});
double scale;
cv::Mat trafo_est = estimateAffine3D(points, transformed_points, &scale);
Mat R_est(trafo_est(Rect(0, 0, 3, 3)));
EXPECT_LE(cvtest::norm(R_est, R, NORM_INF), 1e-6);
Vec3d t_est = trafo_est.col(3);
EXPECT_LE(cvtest::norm(t_est, t, NORM_INF), 1e-6);
EXPECT_NEAR(scale, 1.0, 1e-6);
}
}} // namespace

@ -345,11 +345,16 @@ TEST(usac_P3P, accuracy) {
log(1 - pow(inl_ratio, 3 /* sample size */));
for (auto flag : flags) {
std::vector<int> inliers;
cv::Mat rvec, tvec, mask, R, P;
CV_Assert(cv::solvePnPRansac(obj_pts, img_pts, K1, cv::noArray(), rvec, tvec,
false, (int)max_iters, (float)thr, conf, mask, flag));
false, (int)max_iters, (float)thr, conf, inliers, flag));
cv::Rodrigues(rvec, R);
cv::hconcat(K1 * R, K1 * tvec, P);
mask.create(pts_size, 1, CV_8U);
mask.setTo(Scalar::all(0));
for (auto inl : inliers)
mask.at<uchar>(inl) = true;
checkInliersMask(TestSolver ::PnP, inl_size, thr, img_pts, obj_pts, P, mask);
}
}
@ -416,19 +421,27 @@ TEST(usac_testUsacParams, accuracy) {
// CV_Error(cv::Error::StsError, "Essential matrix estimation failed!");
}
std::vector<int> inliers(pts_size);
// P3P
inl_size = generatePoints(rng, pts1, pts2, K1, K2, false, pts_size, TestSolver::PnP,
getInlierRatio(usac_params.maxIterations, 3, usac_params.confidence), 0.01, gt_inliers);
CV_Assert(cv::solvePnPRansac(pts2, pts1, K1, dist_coeff, rvec, tvec, mask, usac_params));
CV_Assert(cv::solvePnPRansac(pts2, pts1, K1, dist_coeff, rvec, tvec, inliers, usac_params));
cv::Rodrigues(rvec, R); cv::hconcat(K1 * R, K1 * tvec, model);
mask.create(pts_size, 1, CV_8U);
mask.setTo(Scalar::all(0));
for (auto inl : inliers)
mask.at<uchar>(inl) = true;
checkInliersMask(TestSolver::PnP, inl_size, usac_params.threshold, pts1, pts2, model, mask);
// P6P
inl_size = generatePoints(rng, pts1, pts2, K1, K2, false, pts_size, TestSolver::PnP,
getInlierRatio(usac_params.maxIterations, 6, usac_params.confidence), 0.1, gt_inliers);
cv::Mat K_est;
CV_Assert(cv::solvePnPRansac(pts2, pts1, K_est, dist_coeff, rvec, tvec, mask, usac_params));
CV_Assert(cv::solvePnPRansac(pts2, pts1, K_est, dist_coeff, rvec, tvec, inliers, usac_params));
cv::Rodrigues(rvec, R); cv::hconcat(K_est * R, K_est * tvec, model);
mask.setTo(Scalar::all(0));
for (auto inl : inliers)
mask.at<uchar>(inl) = true;
checkInliersMask(TestSolver::PnP, inl_size, usac_params.threshold, pts1, pts2, model, mask);
// Affine2D

@ -738,8 +738,8 @@ concatenated together.
@param imageSize Size of the image used only to initialize the camera intrinsic matrix.
@param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix
\f$\cameramatrix{A}\f$ . If @ref CALIB_USE_INTRINSIC_GUESS
and/or @ref CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
initialized before calling the function.
and/or @ref CALIB_FIX_ASPECT_RATIO, @ref CALIB_FIX_PRINCIPAL_POINT or @ref CALIB_FIX_FOCAL_LENGTH
are specified, some or all of fx, fy, cx, cy must be initialized before calling the function.
@param distCoeffs Input/output vector of distortion coefficients
\f$\distcoeffs\f$.
@param rvecs Output vector of rotation vectors (@ref Rodrigues ) estimated for each pattern view
@ -765,7 +765,7 @@ the number of pattern views. \f$R_i, T_i\f$ are concatenated 1x3 vectors.
fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image
center ( imageSize is used), and focal distances are computed in a least-squares fashion.
Note, that if intrinsic parameters are known, there is no need to use this function just to
estimate extrinsic parameters. Use solvePnP instead.
estimate extrinsic parameters. Use @ref solvePnP instead.
- @ref CALIB_FIX_PRINCIPAL_POINT The principal point is not changed during the global
optimization. It stays at the center or at a different location specified when
@ref CALIB_USE_INTRINSIC_GUESS is set too.
@ -775,24 +775,23 @@ ratio fx/fy stays the same as in the input cameraMatrix . When
ignored, only their ratio is computed and used further.
- @ref CALIB_ZERO_TANGENT_DIST Tangential distortion coefficients \f$(p_1, p_2)\f$ are set
to zeros and stay zero.
- @ref CALIB_FIX_FOCAL_LENGTH The focal length is not changed during the global optimization if
@ref CALIB_USE_INTRINSIC_GUESS is set.
- @ref CALIB_FIX_K1,..., @ref CALIB_FIX_K6 The corresponding radial distortion
coefficient is not changed during the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is
set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0.
- @ref CALIB_RATIONAL_MODEL Coefficients k4, k5, and k6 are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the rational model and return 8 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients.
calibration function use the rational model and return 8 coefficients or more.
- @ref CALIB_THIN_PRISM_MODEL Coefficients s1, s2, s3 and s4 are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the thin prism model and return 12 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients.
calibration function use the thin prism model and return 12 coefficients or more.
- @ref CALIB_FIX_S1_S2_S3_S4 The thin prism distortion coefficients are not changed during
the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
supplied distCoeffs matrix is used. Otherwise, it is set to 0.
- @ref CALIB_TILTED_MODEL Coefficients tauX and tauY are enabled. To provide the
backward compatibility, this extra flag should be explicitly specified to make the
calibration function use the tilted sensor model and return 14 coefficients. If the flag is not
set, the function computes and returns only 5 distortion coefficients.
calibration function use the tilted sensor model and return 14 coefficients.
- @ref CALIB_FIX_TAUX_TAUY The coefficients of the tilted sensor model are not changed during
the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the
supplied distCoeffs matrix is used. Otherwise, it is set to 0.
@ -817,12 +816,12 @@ The algorithm performs the following steps:
zeros initially unless some of CALIB_FIX_K? are specified.
- Estimate the initial camera pose as if the intrinsic parameters have been already known. This is
done using solvePnP .
done using @ref solvePnP .
- Run the global Levenberg-Marquardt optimization algorithm to minimize the reprojection error,
that is, the total sum of squared distances between the observed feature points imagePoints and
the projected (using the current estimates for camera parameters and the poses) object points
objectPoints. See projectPoints for details.
objectPoints. See @ref projectPoints for details.
@note
If you use a non-square (i.e. non-N-by-N) grid and @ref findChessboardCorners for calibration,

@ -142,6 +142,11 @@
# define CV_NEON 1
#endif
#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
# include<riscv-vector.h>
# define CV_RVV071 1
#endif
#if defined(__ARM_NEON__) || defined(__aarch64__)
# include <arm_neon.h>
#endif
@ -338,6 +343,10 @@ struct VZeroUpperGuard {
# define CV_NEON 0
#endif
#ifndef CV_RVV071
# define CV_RVV071 0
#endif
#ifndef CV_VSX
# define CV_VSX 0
#endif

@ -271,6 +271,8 @@ namespace cv {
#define CV_CPU_MSA 150
#define CV_CPU_RISCVV 170
#define CV_CPU_VSX 200
#define CV_CPU_VSX3 201
@ -325,6 +327,8 @@ enum CpuFeatures {
CPU_MSA = 150,
CPU_RISCVV = 170,
CPU_VSX = 200,
CPU_VSX3 = 201,
@ -681,7 +685,7 @@ __CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType)
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
#else
#ifdef OPENCV_FORCE_UNSAFE_XADD
CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
CV_INLINE int CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
#else
#error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)"
#endif

@ -200,7 +200,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
# undef CV_RVV
#endif
#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
#define CV__SIMD_FORWARD 128
#include "opencv2/core/hal/intrin_forward.hpp"
#endif
@ -214,6 +214,10 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
#include "opencv2/core/hal/intrin_neon.hpp"
#elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
#define CV_SIMD128_CPP 0
#include "opencv2/core/hal/intrin_rvv071.hpp"
#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_vsx.hpp"

@ -538,49 +538,81 @@ inline void v_mul_expand(const v_int8x16& a, const v_int8x16& b,
v_int16x8& c, v_int16x8& d)
{
c.val = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
#if CV_NEON_AARCH64
d.val = vmull_high_s8(a.val, b.val);
#else // #if CV_NEON_AARCH64
d.val = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_uint8x16& a, const v_uint8x16& b,
v_uint16x8& c, v_uint16x8& d)
{
c.val = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
#if CV_NEON_AARCH64
d.val = vmull_high_u8(a.val, b.val);
#else // #if CV_NEON_AARCH64
d.val = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
v_int32x4& c, v_int32x4& d)
{
c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
#if CV_NEON_AARCH64
d.val = vmull_high_s16(a.val, b.val);
#else // #if CV_NEON_AARCH64
d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
v_uint32x4& c, v_uint32x4& d)
{
c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val));
#if CV_NEON_AARCH64
d.val = vmull_high_u16(a.val, b.val);
#else // #if CV_NEON_AARCH64
d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
#endif // #if CV_NEON_AARCH64
}
inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
v_uint64x2& c, v_uint64x2& d)
{
c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val));
#if CV_NEON_AARCH64
d.val = vmull_high_u32(a.val, b.val);
#else // #if CV_NEON_AARCH64
d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
#endif // #if CV_NEON_AARCH64
}
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
{
return v_int16x8(vcombine_s16(
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
vshrn_n_s32(vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)), 16)
vshrn_n_s32(
#if CV_NEON_AARCH64
vmull_high_s16(a.val, b.val)
#else // #if CV_NEON_AARCH64
vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val))
#endif // #if CV_NEON_AARCH64
, 16)
));
}
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
{
return v_uint16x8(vcombine_u16(
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
vshrn_n_u32(vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)), 16)
vshrn_n_u32(
#if CV_NEON_AARCH64
vmull_high_u16(a.val, b.val)
#else // #if CV_NEON_AARCH64
vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val))
#endif // #if CV_NEON_AARCH64
, 16)
));
}
@ -1254,29 +1286,56 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
inline unsigned v_reduce_sum(const v_uint8x16& a)
{
#if CV_NEON_AARCH64
uint16_t t0 = vaddlvq_u8(a.val);
return t0;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline int v_reduce_sum(const v_int8x16& a)
{
#if CV_NEON_AARCH64
int16_t t0 = vaddlvq_s8(a.val);
return t0;
#else // #if CV_NEON_AARCH64
int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val));
int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
return vget_lane_s32(vpadd_s32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sum(const v_uint16x8& a)
{
#if CV_NEON_AARCH64
uint32_t t0 = vaddlvq_u16(a.val);
return t0;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(a.val);
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline int v_reduce_sum(const v_int16x8& a)
{
#if CV_NEON_AARCH64
int32_t t0 = vaddlvq_s16(a.val);
return t0;
#else // #if CV_NEON_AARCH64
int32x4_t t0 = vpaddlq_s16(a.val);
int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
return vget_lane_s32(vpadd_s32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
#if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
return v##vectorfunc##vq_##suffix(a.val); \
}
#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
@ -1285,12 +1344,20 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8)
#if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
return v##vectorfunc##vq_##suffix(a.val); \
}
#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
@ -1298,18 +1365,27 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
#if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
return v##vectorfunc##vq_##suffix(a.val); \
}
#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
_Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \
}
#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32)
@ -1322,9 +1398,21 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
inline uint64 v_reduce_sum(const v_uint64x2& a)
{ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); }
{
#if CV_NEON_AARCH64
return vaddvq_u64(a.val);
#else // #if CV_NEON_AARCH64
return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0);
#endif // #if CV_NEON_AARCH64
}
inline int64 v_reduce_sum(const v_int64x2& a)
{ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); }
{
#if CV_NEON_AARCH64
return vaddvq_s64(a.val);
#else // #if CV_NEON_AARCH64
return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0);
#endif // #if CV_NEON_AARCH64
}
#if CV_SIMD128_64F
inline double v_reduce_sum(const v_float64x2& a)
{
@ -1335,6 +1423,11 @@ inline double v_reduce_sum(const v_float64x2& a)
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d)
{
#if CV_NEON_AARCH64
float32x4_t ab = vpaddq_f32(a.val, b.val); // a0+a1 a2+a3 b0+b1 b2+b3
float32x4_t cd = vpaddq_f32(c.val, d.val); // c0+c1 d0+d1 c2+c3 d2+d3
return v_float32x4(vpaddq_f32(ab, cd)); // sumA sumB sumC sumD
#else // #if CV_NEON_AARCH64
float32x4x2_t ab = vtrnq_f32(a.val, b.val);
float32x4x2_t cd = vtrnq_f32(c.val, d.val);
@ -1345,49 +1438,91 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
float32x4_t v1 = vcombine_f32(vget_high_f32(u0), vget_high_f32(u1));
return v_float32x4(vaddq_f32(v0, v1));
#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
{
#if CV_NEON_AARCH64
uint8x16_t t0 = vabdq_u8(a.val, b.val);
uint16_t t1 = vaddlvq_u8(t0);
return t1;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vabdq_u8(a.val, b.val)));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
{
#if CV_NEON_AARCH64
uint8x16_t t0 = vreinterpretq_u8_s8(vabdq_s8(a.val, b.val));
uint16_t t1 = vaddlvq_u8(t0);
return t1;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s8(vabdq_s8(a.val, b.val))));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
{
#if CV_NEON_AARCH64
uint16x8_t t0 = vabdq_u16(a.val, b.val);
uint32_t t1 = vaddlvq_u16(t0);
return t1;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vabdq_u16(a.val, b.val));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
{
#if CV_NEON_AARCH64
uint16x8_t t0 = vreinterpretq_u16_s16(vabdq_s16(a.val, b.val));
uint32_t t1 = vaddlvq_u16(t0);
return t1;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vpaddlq_u16(vreinterpretq_u16_s16(vabdq_s16(a.val, b.val)));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_uint32x4& a, const v_uint32x4& b)
{
#if CV_NEON_AARCH64
uint32x4_t t0 = vabdq_u32(a.val, b.val);
uint32_t t1 = vaddvq_u32(t0);
return t1;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vabdq_u32(a.val, b.val);
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline unsigned v_reduce_sad(const v_int32x4& a, const v_int32x4& b)
{
#if CV_NEON_AARCH64
uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val));
uint32_t t1 = vaddvq_u32(t0);
return t1;
#else // #if CV_NEON_AARCH64
uint32x4_t t0 = vreinterpretq_u32_s32(vabdq_s32(a.val, b.val));
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
return vget_lane_u32(vpadd_u32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
{
#if CV_NEON_AARCH64
float32x4_t t0 = vabdq_f32(a.val, b.val);
return vaddvq_f32(t0);
#else // #if CV_NEON_AARCH64
float32x4_t t0 = vabdq_f32(a.val, b.val);
float32x2_t t1 = vpadd_f32(vget_low_f32(t0), vget_high_f32(t0));
return vget_lane_f32(vpadd_f32(t1, t1), 0);
#endif // #if CV_NEON_AARCH64
}
inline v_uint8x16 v_popcount(const v_uint8x16& a)
@ -1409,30 +1544,54 @@ inline v_uint64x2 v_popcount(const v_int64x2& a)
inline int v_signmask(const v_uint8x16& a)
{
#if CV_NEON_AARCH64
const int8x16_t signPosition = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7};
const uint8x16_t byteOrder = {0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15};
uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), signPosition);
uint8x16_t v1 = vqtbl1q_u8(v0, byteOrder);
uint32_t t0 = vaddlvq_u16(vreinterpretq_u16_u8(v1));
return t0;
#else // #if CV_NEON_AARCH64
int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100));
uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0));
uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0)));
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8);
#endif // #if CV_NEON_AARCH64
}
inline int v_signmask(const v_int8x16& a)
{ return v_signmask(v_reinterpret_as_u8(a)); }
inline int v_signmask(const v_uint16x8& a)
{
#if CV_NEON_AARCH64
const int16x8_t signPosition = {0,1,2,3,4,5,6,7};
uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), signPosition);
uint32_t t0 = vaddlvq_u16(v0);
return t0;
#else // #if CV_NEON_AARCH64
int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000));
uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0));
uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0));
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4);
#endif // #if CV_NEON_AARCH64
}
inline int v_signmask(const v_int16x8& a)
{ return v_signmask(v_reinterpret_as_u16(a)); }
inline int v_signmask(const v_uint32x4& a)
{
#if CV_NEON_AARCH64
const int32x4_t signPosition = {0,1,2,3};
uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), signPosition);
uint32_t t0 = vaddvq_u32(v0);
return t0;
#else // #if CV_NEON_AARCH64
int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000));
uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0));
uint64x2_t v1 = vpaddlq_u32(v0);
return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2);
#endif // #if CV_NEON_AARCH64
}
inline int v_signmask(const v_int32x4& a)
{ return v_signmask(v_reinterpret_as_u32(a)); }
@ -1440,9 +1599,16 @@ inline int v_signmask(const v_float32x4& a)
{ return v_signmask(v_reinterpret_as_u32(a)); }
inline int v_signmask(const v_uint64x2& a)
{
#if CV_NEON_AARCH64
const int64x2_t signPosition = {0,1};
uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), signPosition);
uint64_t t0 = vaddvq_u64(v0);
return t0;
#else // #if CV_NEON_AARCH64
int64x1_t m0 = vdup_n_s64(0);
uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));
return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1);
#endif // #if CV_NEON_AARCH64
}
inline int v_signmask(const v_int64x2& a)
{ return v_signmask(v_reinterpret_as_u64(a)); }
@ -1464,19 +1630,31 @@ inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signma
inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); }
#endif
#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
inline bool v_check_all(const v_##_Tpvec& a) \
{ \
_Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
} \
inline bool v_check_any(const v_##_Tpvec& a) \
{ \
_Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
}
#if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
inline bool v_check_all(const v_##_Tpvec& a) \
{ \
return (vminvq_##suffix(a.val) >> shift) != 0; \
} \
inline bool v_check_any(const v_##_Tpvec& a) \
{ \
return (vmaxvq_##suffix(a.val) >> shift) != 0; \
}
#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
inline bool v_check_all(const v_##_Tpvec& a) \
{ \
_Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
} \
inline bool v_check_any(const v_##_Tpvec& a) \
{ \
_Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
}
#endif // #if CV_NEON_AARCH64
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
@ -1829,6 +2007,37 @@ inline v_int32x4 v_trunc(const v_float64x2& a)
}
#endif
#if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
v_##_Tpvec& b0, v_##_Tpvec& b1, \
v_##_Tpvec& b2, v_##_Tpvec& b3) \
{ \
/* -- Pass 1: 64b transpose */ \
_Tpvec##_t t0 = vreinterpretq_##suffix##32_##suffix##64( \
vtrn1q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a0.val), \
vreinterpretq_##suffix##64_##suffix##32(a2.val))); \
_Tpvec##_t t1 = vreinterpretq_##suffix##32_##suffix##64( \
vtrn1q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a1.val), \
vreinterpretq_##suffix##64_##suffix##32(a3.val))); \
_Tpvec##_t t2 = vreinterpretq_##suffix##32_##suffix##64( \
vtrn2q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a0.val), \
vreinterpretq_##suffix##64_##suffix##32(a2.val))); \
_Tpvec##_t t3 = vreinterpretq_##suffix##32_##suffix##64( \
vtrn2q_##suffix##64(vreinterpretq_##suffix##64_##suffix##32(a1.val), \
vreinterpretq_##suffix##64_##suffix##32(a3.val))); \
/* -- Pass 2: 32b transpose */ \
b0.val = vtrn1q_##suffix##32(t0, t1); \
b1.val = vtrn2q_##suffix##32(t0, t1); \
b2.val = vtrn1q_##suffix##32(t2, t3); \
b3.val = vtrn2q_##suffix##32(t2, t3); \
}
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u)
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s)
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f)
#else // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
@ -1854,6 +2063,7 @@ inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32)
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32)
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32)
#endif // #if CV_NEON_AARCH64
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \
inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \

File diff suppressed because it is too large Load Diff

@ -2011,6 +2011,11 @@ public:
template<typename _Tp> MatIterator_<_Tp> begin();
template<typename _Tp> MatConstIterator_<_Tp> begin() const;
/** @brief Same as begin() but for inverse traversal
*/
template<typename _Tp> std::reverse_iterator<MatIterator_<_Tp>> rbegin();
template<typename _Tp> std::reverse_iterator<MatConstIterator_<_Tp>> rbegin() const;
/** @brief Returns the matrix iterator and sets it to the after-last matrix element.
The methods return the matrix read-only or read-write iterators, set to the point following the last
@ -2019,6 +2024,12 @@ public:
template<typename _Tp> MatIterator_<_Tp> end();
template<typename _Tp> MatConstIterator_<_Tp> end() const;
/** @brief Same as end() but for inverse traversal
*/
template<typename _Tp> std::reverse_iterator< MatIterator_<_Tp>> rend();
template<typename _Tp> std::reverse_iterator< MatConstIterator_<_Tp>> rend() const;
/** @brief Runs the given functor over all matrix elements in parallel.
The operation passed as argument has to be a function pointer, a function object or a lambda(C++11).
@ -2250,6 +2261,12 @@ public:
const_iterator begin() const;
const_iterator end() const;
//reverse iterators
std::reverse_iterator<iterator> rbegin();
std::reverse_iterator<iterator> rend();
std::reverse_iterator<const_iterator> rbegin() const;
std::reverse_iterator<const_iterator> rend() const;
//! template methods for for operation over all matrix elements.
// the operations take care of skipping gaps in the end of rows (if any)
template<typename Functor> void forEach(const Functor& operation);

@ -863,6 +863,33 @@ const _Tp* Mat::ptr(const int* idx) const
return (const _Tp*)p;
}
template<int n> inline
uchar* Mat::ptr(const Vec<int, n>& idx)
{
return Mat::ptr(idx.val);
}
template<int n> inline
const uchar* Mat::ptr(const Vec<int, n>& idx) const
{
return Mat::ptr(idx.val);
}
template<typename _Tp, int n> inline
_Tp* Mat::ptr(const Vec<int, n>& idx)
{
CV_DbgAssert( elemSize() == sizeof(_Tp) );
return Mat::ptr<_Tp>(idx.val);
}
template<typename _Tp, int n> inline
const _Tp* Mat::ptr(const Vec<int, n>& idx) const
{
CV_DbgAssert( elemSize() == sizeof(_Tp) );
return Mat::ptr<_Tp>(idx.val);
}
template<typename _Tp> inline
_Tp& Mat::at(int i0, int i1)
{
@ -988,6 +1015,17 @@ MatConstIterator_<_Tp> Mat::begin() const
return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this);
}
template<typename _Tp> inline
std::reverse_iterator<MatConstIterator_<_Tp>> Mat::rbegin() const
{
if (empty())
return std::reverse_iterator<MatConstIterator_<_Tp>>();
CV_DbgAssert( elemSize() == sizeof(_Tp) );
MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this);
it += total();
return std::reverse_iterator<MatConstIterator_<_Tp>> (it);
}
template<typename _Tp> inline
MatConstIterator_<_Tp> Mat::end() const
{
@ -999,6 +1037,15 @@ MatConstIterator_<_Tp> Mat::end() const
return it;
}
template<typename _Tp> inline
std::reverse_iterator<MatConstIterator_<_Tp>> Mat::rend() const
{
if (empty())
return std::reverse_iterator<MatConstIterator_<_Tp>>();
CV_DbgAssert( elemSize() == sizeof(_Tp) );
return std::reverse_iterator<MatConstIterator_<_Tp>>((const Mat_<_Tp>*)this);
}
template<typename _Tp> inline
MatIterator_<_Tp> Mat::begin()
{
@ -1008,6 +1055,17 @@ MatIterator_<_Tp> Mat::begin()
return MatIterator_<_Tp>((Mat_<_Tp>*)this);
}
template<typename _Tp> inline
std::reverse_iterator<MatIterator_<_Tp>> Mat::rbegin()
{
if (empty())
return std::reverse_iterator<MatIterator_<_Tp>>();
CV_DbgAssert( elemSize() == sizeof(_Tp) );
MatIterator_<_Tp> it((Mat_<_Tp>*)this);
it += total();
return std::reverse_iterator<MatIterator_<_Tp>>(it);
}
template<typename _Tp> inline
MatIterator_<_Tp> Mat::end()
{
@ -1019,6 +1077,15 @@ MatIterator_<_Tp> Mat::end()
return it;
}
template<typename _Tp> inline
std::reverse_iterator<MatIterator_<_Tp>> Mat::rend()
{
if (empty())
return std::reverse_iterator<MatIterator_<_Tp>>();
CV_DbgAssert( elemSize() == sizeof(_Tp) );
return std::reverse_iterator<MatIterator_<_Tp>>(MatIterator_<_Tp>((Mat_<_Tp>*)this));
}
template<typename _Tp, typename Functor> inline
void Mat::forEach(const Functor& operation) {
this->forEach_impl<_Tp>(operation);
@ -1686,24 +1753,48 @@ MatConstIterator_<_Tp> Mat_<_Tp>::begin() const
return Mat::begin<_Tp>();
}
template<typename _Tp> inline
std::reverse_iterator<MatConstIterator_<_Tp>> Mat_<_Tp>::rbegin() const
{
return Mat::rbegin<_Tp>();
}
template<typename _Tp> inline
MatConstIterator_<_Tp> Mat_<_Tp>::end() const
{
return Mat::end<_Tp>();
}
template<typename _Tp> inline
std::reverse_iterator<MatConstIterator_<_Tp>> Mat_<_Tp>::rend() const
{
return Mat::rend<_Tp>();
}
template<typename _Tp> inline
MatIterator_<_Tp> Mat_<_Tp>::begin()
{
return Mat::begin<_Tp>();
}
template<typename _Tp> inline
std::reverse_iterator<MatIterator_<_Tp>> Mat_<_Tp>::rbegin()
{
return Mat::rbegin<_Tp>();
}
template<typename _Tp> inline
MatIterator_<_Tp> Mat_<_Tp>::end()
{
return Mat::end<_Tp>();
}
template<typename _Tp> inline
std::reverse_iterator<MatIterator_<_Tp>> Mat_<_Tp>::rend()
{
return Mat::rend<_Tp>();
}
template<typename _Tp> template<typename Functor> inline
void Mat_<_Tp>::forEach(const Functor& operation) {
Mat::forEach<_Tp, Functor>(operation);

@ -43,6 +43,8 @@
#define OPENCV_OPENCL_HPP
#include "opencv2/core.hpp"
#include <typeinfo>
#include <typeindex>
namespace cv { namespace ocl {
@ -277,6 +279,12 @@ public:
/** @returns cl_context value */
void* ptr() const;
/**
* @brief Get OpenCL context property specified on context creation
* @param propertyId Property id (CL_CONTEXT_* as defined in cl_context_properties type)
* @returns Property value if property was specified on clCreateContext, or NULL if context created without the property
*/
void* getOpenCLContextProperty(int propertyId) const;
bool useSVM() const;
void setUseSVM(bool enabled);
@ -290,6 +298,21 @@ public:
void release();
class CV_EXPORTS UserContext {
public:
virtual ~UserContext();
};
template <typename T>
inline void setUserContext(const std::shared_ptr<T>& userContext) {
setUserContext(typeid(T), userContext);
}
template <typename T>
inline std::shared_ptr<T> getUserContext() {
return std::dynamic_pointer_cast<T>(getUserContext(typeid(T)));
}
void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext);
std::shared_ptr<UserContext> getUserContext(std::type_index typeId);
struct Impl;
inline Impl* getImpl() const { return (Impl*)p; }
inline bool empty() const { return !p; }

@ -714,24 +714,24 @@ public:
//! the default constructor
CV_WRAP KeyPoint();
/**
@param _pt x & y coordinates of the keypoint
@param _size keypoint diameter
@param _angle keypoint orientation
@param _response keypoint detector response on the keypoint (that is, strength of the keypoint)
@param _octave pyramid octave in which the keypoint has been detected
@param _class_id object id
@param pt x & y coordinates of the keypoint
@param size keypoint diameter
@param angle keypoint orientation
@param response keypoint detector response on the keypoint (that is, strength of the keypoint)
@param octave pyramid octave in which the keypoint has been detected
@param class_id object id
*/
KeyPoint(Point2f _pt, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1);
KeyPoint(Point2f pt, float size, float angle=-1, float response=0, int octave=0, int class_id=-1);
/**
@param x x-coordinate of the keypoint
@param y y-coordinate of the keypoint
@param _size keypoint diameter
@param _angle keypoint orientation
@param _response keypoint detector response on the keypoint (that is, strength of the keypoint)
@param _octave pyramid octave in which the keypoint has been detected
@param _class_id object id
@param size keypoint diameter
@param angle keypoint orientation
@param response keypoint detector response on the keypoint (that is, strength of the keypoint)
@param octave pyramid octave in which the keypoint has been detected
@param class_id object id
*/
CV_WRAP KeyPoint(float x, float y, float _size, float _angle=-1, float _response=0, int _octave=0, int _class_id=-1);
CV_WRAP KeyPoint(float x, float y, float size, float angle=-1, float response=0, int octave=0, int class_id=-1);
size_t hash() const;

@ -80,7 +80,9 @@ LibHandle_t libraryLoad_(const FileSystemPath_t& filename)
return LoadLibraryW(filename.c_str());
#endif
#elif defined(__linux__) || defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__HAIKU__) || defined(__GLIBC__)
return dlopen(filename.c_str(), RTLD_NOW);
void* handle = dlopen(filename.c_str(), RTLD_NOW);
CV_LOG_IF_DEBUG(NULL, !handle, "dlopen() error: " << dlerror());
return handle;
#endif
}

@ -21,7 +21,6 @@ NS_ASSUME_NONNULL_BEGIN
/**
* Represents a two dimensional point the coordinate values of which are of type `int`
*/
NS_SWIFT_NAME(Point)
CV_EXPORTS @interface Point2i : NSObject
# pragma mark - Properties

@ -22,7 +22,6 @@ NS_ASSUME_NONNULL_BEGIN
/**
* Represents a rectange the coordinate and dimension values of which are of type `int`
*/
NS_SWIFT_NAME(Rect)
CV_EXPORTS @interface Rect2i : NSObject
#pragma mark - Properties

@ -21,7 +21,6 @@ NS_ASSUME_NONNULL_BEGIN
/**
* Represents the dimensions of a rectangle the values of which are of type `int`
*/
NS_SWIFT_NAME(Size)
CV_EXPORTS @interface Size2i : NSObject
#pragma mark - Properties

@ -0,0 +1,11 @@
//
// Typealiases.swift
//
// Created by Chris Ballinger on 2020/11/18.
//
import Foundation
public typealias Rect = Rect2i
public typealias Point = Point2i
public typealias Size = Size2i

@ -113,13 +113,13 @@
"objc_type": "Point2i*",
"to_cpp": "%(n)s.nativeRef",
"from_cpp": "[Point2i fromNative:%(n)s]",
"swift_type": "Point"
"swift_type": "Point2i"
},
"Point2i": {
"objc_type": "Point2i*",
"to_cpp": "%(n)s.nativeRef",
"from_cpp": "[Point2i fromNative:%(n)s]",
"swift_type": "Point"
"swift_type": "Point2i"
},
"Point2f": {
"objc_type": "Point2f*",
@ -155,13 +155,13 @@
"objc_type": "Rect2i*",
"to_cpp": "%(n)s.nativeRef",
"from_cpp": "[Rect2i fromNative:%(n)s]",
"swift_type": "Rect"
"swift_type": "Rect2i"
},
"Rect2i": {
"objc_type": "Rect2i*",
"to_cpp": "%(n)s.nativeRef",
"from_cpp": "[Rect2i fromNative:%(n)s]",
"swift_type": "Rect"
"swift_type": "Rect2i"
},
"Rect2f": {
"objc_type": "Rect2f*",
@ -187,13 +187,13 @@
"objc_type": "Size2i*",
"to_cpp": "%(n)s.nativeRef",
"from_cpp": "[Size2i fromNative:%(n)s]",
"swift_type": "Size"
"swift_type": "Size2i"
},
"Size2i": {
"objc_type": "Size2i*",
"to_cpp": "%(n)s.nativeRef",
"from_cpp": "[Size2i fromNative:%(n)s]",
"swift_type": "Size"
"swift_type": "Size2i"
},
"Size2f": {
"objc_type": "Size2f*",
@ -275,7 +275,7 @@
"vector_Point": {
"objc_type": "Point2i*",
"v_type": "Point2i",
"swift_type": "[Point]"
"swift_type": "[Point2i]"
},
"vector_Point2f": {
"objc_type": "Point2f*",
@ -300,7 +300,7 @@
"vector_Rect": {
"objc_type": "Rect2i*",
"v_type": "Rect2i",
"swift_type": "[Rect]"
"swift_type": "[Rect2i]"
},
"vector_Rect2d": {
"objc_type": "Rect2d*",
@ -388,7 +388,7 @@
"vector_vector_Point": {
"objc_type": "Point2i*",
"v_v_type": "Point2i",
"swift_type": "[[Point]]"
"swift_type": "[[Point2i]]"
},
"vector_vector_Point2f": {
"objc_type": "Point2f*",

@ -1910,4 +1910,4 @@ DEFINE_SIMD_ALL(recip, recip_loop)
#define SIMD_GUARD
#endif
}} // cv::hal::
}} // cv::hal::

File diff suppressed because it is too large Load Diff

@ -1,23 +0,0 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_SRC_DIRECTX_HPP
#define OPENCV_CORE_SRC_DIRECTX_HPP
#ifndef HAVE_DIRECTX
#error Invalid build configuration
#endif
namespace cv {
namespace directx {
namespace internal {
struct OpenCLDirectXImpl;
OpenCLDirectXImpl* createDirectXImpl();
void deleteDirectXImpl(OpenCLDirectXImpl**);
OpenCLDirectXImpl* getDirectXImpl(ocl::Context& ctx);
}}} // namespace internal
#endif // OPENCV_CORE_SRC_DIRECTX_HPP

@ -43,7 +43,9 @@
#include "precomp.hpp"
#include "opencv2/core/utils/filesystem.hpp"
#include "opencv2/core/utils/filesystem.private.hpp"
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
#if defined _WIN32 || defined WINCE
# include <windows.h>
const char dir_separators[] = "/\\";
@ -131,12 +133,15 @@ namespace
}
#else
#else // defined _WIN32 || defined WINCE
# include <dirent.h>
# include <sys/stat.h>
const char dir_separators[] = "/";
#endif
#endif // defined _WIN32 || defined WINCE
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
static bool isDir(const cv::String& path, DIR* dir)
{
#if defined _WIN32 || defined _WIN32_WCE
@ -168,13 +173,20 @@ static bool isDir(const cv::String& path, DIR* dir)
return is_dir != 0;
#endif
}
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
bool cv::utils::fs::isDirectory(const cv::String& path)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_INSTRUMENT_REGION();
return isDir(path, NULL);
#else
CV_UNUSED(path);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif
}
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
static bool wildcmp(const char *string, const char *wild)
{
// Based on wildcmp written by Jack Handy - <A href="mailto:jakkhandy@hotmail.com">jakkhandy@hotmail.com</A>
@ -267,9 +279,11 @@ static void glob_rec(const cv::String& directory, const cv::String& wildchart, s
CV_Error_(CV_StsObjectNotFound, ("could not open directory: %s", directory.c_str()));
}
}
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
void cv::glob(String pattern, std::vector<String>& result, bool recursive)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_INSTRUMENT_REGION();
result.clear();
@ -303,20 +317,44 @@ void cv::glob(String pattern, std::vector<String>& result, bool recursive)
glob_rec(path, wildchart, result, recursive, false, path);
std::sort(result.begin(), result.end());
#else // OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_UNUSED(pattern);
CV_UNUSED(result);
CV_UNUSED(recursive);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
}
void cv::utils::fs::glob(const cv::String& directory, const cv::String& pattern,
std::vector<cv::String>& result,
bool recursive, bool includeDirectories)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
glob_rec(directory, pattern, result, recursive, includeDirectories, directory);
std::sort(result.begin(), result.end());
#else // OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_UNUSED(directory);
CV_UNUSED(pattern);
CV_UNUSED(result);
CV_UNUSED(recursive);
CV_UNUSED(includeDirectories);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
}
void cv::utils::fs::glob_relative(const cv::String& directory, const cv::String& pattern,
std::vector<cv::String>& result,
bool recursive, bool includeDirectories)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
glob_rec(directory, pattern, result, recursive, includeDirectories, cv::String());
std::sort(result.begin(), result.end());
#else // OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_UNUSED(directory);
CV_UNUSED(pattern);
CV_UNUSED(result);
CV_UNUSED(recursive);
CV_UNUSED(includeDirectories);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
}

@ -113,10 +113,6 @@
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#ifdef HAVE_DIRECTX
#include "directx.hpp"
#endif
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
@ -2367,9 +2363,6 @@ protected:
, contextId(CV_XADD(&g_contextId, 1))
, configuration(configuration_)
, handle(0)
#ifdef HAVE_DIRECTX
, p_directx_impl(0)
#endif
#ifdef HAVE_OPENCL_SVM
, svmInitialized(false)
#endif
@ -2395,11 +2388,10 @@ protected:
handle = NULL;
}
devices.clear();
#ifdef HAVE_DIRECTX
directx::internal::deleteDirectXImpl(&p_directx_impl);
#endif
}
userContextStorage.clear();
{
cv::AutoLock lock(cv::getInitializationMutex());
auto& container = getGlobalContainer();
@ -2705,18 +2697,20 @@ public:
return *bufferPoolHostPtr_.get();
}
#ifdef HAVE_DIRECTX
directx::internal::OpenCLDirectXImpl* p_directx_impl;
directx::internal::OpenCLDirectXImpl* getDirectXImpl()
{
if (!p_directx_impl)
{
p_directx_impl = directx::internal::createDirectXImpl();
}
return p_directx_impl;
std::map<std::type_index, std::shared_ptr<UserContext>> userContextStorage;
cv::Mutex userContextMutex;
void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext) {
cv::AutoLock lock(userContextMutex);
userContextStorage[typeId] = userContext;
}
std::shared_ptr<UserContext> getUserContext(std::type_index typeId) {
cv::AutoLock lock(userContextMutex);
auto it = userContextStorage.find(typeId);
if (it != userContextStorage.end())
return it->second;
else
return nullptr;
}
#endif
#ifdef HAVE_OPENCL_SVM
bool svmInitialized;
@ -3036,6 +3030,25 @@ Context Context::create(const std::string& configuration)
return ctx;
}
void* Context::getOpenCLContextProperty(int propertyId) const
{
if (p == NULL)
return nullptr;
::size_t size = 0;
CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, 0, NULL, &size));
std::vector<cl_context_properties> prop(size / sizeof(cl_context_properties), (cl_context_properties)0);
CV_OCL_CHECK(clGetContextInfo(p->handle, CL_CONTEXT_PROPERTIES, size, prop.data(), NULL));
for (size_t i = 0; i < prop.size(); i += 2)
{
if (prop[i] == (cl_context_properties)propertyId)
{
CV_LOG_DEBUG(NULL, "OpenCL: found context property=" << propertyId << ") => " << (void*)prop[i + 1]);
return (void*)prop[i + 1];
}
}
return nullptr;
}
#ifdef HAVE_OPENCL_SVM
bool Context::useSVM() const
{
@ -3097,6 +3110,21 @@ CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
} // namespace cv::ocl::svm
#endif // HAVE_OPENCL_SVM
Context::UserContext::~UserContext()
{
}
void Context::setUserContext(std::type_index typeId, const std::shared_ptr<Context::UserContext>& userContext)
{
CV_Assert(p);
p->setUserContext(typeId, userContext);
}
std::shared_ptr<Context::UserContext> Context::getUserContext(std::type_index typeId)
{
CV_Assert(p);
return p->getUserContext(typeId);
}
static void get_platform_name(cl_platform_id id, String& name)
{
@ -3454,7 +3482,6 @@ struct Kernel::Impl
void registerImageArgument(int arg, const Image2D& image)
{
CV_CheckGE(arg, 0, "");
CV_CheckLT(arg, (int)MAX_ARRS, "");
if (arg < (int)shadow_images.size() && shadow_images[arg].ptr() != image.ptr()) // TODO future: replace ptr => impl (more strong check)
{
CV_Check(arg, !isInProgress, "ocl::Kernel: clearing of pending Image2D arguments is not allowed");
@ -7505,15 +7532,4 @@ uint64 Timer::durationNS() const
}} // namespace
#ifdef HAVE_DIRECTX
namespace cv { namespace directx { namespace internal {
OpenCLDirectXImpl* getDirectXImpl(ocl::Context& ctx)
{
ocl::Context::Impl* i = ctx.getImpl();
CV_Assert(i);
return i->getDirectXImpl();
}
}}} // namespace cv::directx::internal
#endif
#endif // HAVE_OPENCL

@ -172,9 +172,16 @@ Context& Context::getDefault(bool initialize)
}
void* Context::ptr() const { return NULL; }
void* Context::getOpenCLContextProperty(int /*propertyId*/) const { OCL_NOT_AVAILABLE(); }
bool Context::useSVM() const { return false; }
void Context::setUseSVM(bool enabled) { }
Context::UserContext::~UserContext() { }
void Context::setUserContext(std::type_index /*typeId*/, const std::shared_ptr<Context::UserContext>& /*userContext*/) { OCL_NOT_AVAILABLE(); }
std::shared_ptr<Context::UserContext> Context::getUserContext(std::type_index /*typeId*/) { OCL_NOT_AVAILABLE(); }
/* static */ Context Context::fromHandle(void* context) { OCL_NOT_AVAILABLE(); }
/* static */ Context Context::fromDevice(const ocl::Device& device) { OCL_NOT_AVAILABLE(); }
/* static */ Context Context::create(const std::string& configuration) { OCL_NOT_AVAILABLE(); }

@ -375,6 +375,8 @@ cv::Mutex& getInitializationMutex();
#define CV_SINGLETON_LAZY_INIT(TYPE, INITIALIZER) CV_SINGLETON_LAZY_INIT_(TYPE, INITIALIZER, instance)
#define CV_SINGLETON_LAZY_INIT_REF(TYPE, INITIALIZER) CV_SINGLETON_LAZY_INIT_(TYPE, INITIALIZER, *instance)
CV_EXPORTS void releaseTlsStorageThread();
int cv_snprintf(char* buf, int len, const char* fmt, ...);
int cv_vsnprintf(char* buf, int len, const char* fmt, va_list args);
}

@ -53,6 +53,8 @@
#include <opencv2/core/utils/tls.hpp>
#include <opencv2/core/utils/instrumentation.hpp>
#include <opencv2/core/utils/filesystem.private.hpp>
namespace cv {
static void _initSystem()
@ -393,6 +395,7 @@ struct HWFeatures
g_hwFeatureNames[CPU_VSX3] = "VSX3";
g_hwFeatureNames[CPU_MSA] = "CPU_MSA";
g_hwFeatureNames[CPU_RISCVV] = "RISCVV";
g_hwFeatureNames[CPU_AVX512_COMMON] = "AVX512-COMMON";
g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX";
@ -588,6 +591,9 @@ struct HWFeatures
#if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800)
have[CV_CPU_NEON] = true;
#endif
#ifdef __riscv_vector
have[CV_CPU_RISCVV] = true;
#endif
#ifdef __mips_msa
have[CV_CPU_MSA] = true;
#endif
@ -947,6 +953,7 @@ String format( const char* fmt, ... )
String tempfile( const char* suffix )
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
String fname;
#ifndef NO_GETENV
const char *temp_dir = getenv("OPENCV_TEMP_PATH");
@ -1033,6 +1040,10 @@ String tempfile( const char* suffix )
return fname + suffix;
}
return fname;
#else // OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_UNUSED(suffix);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
}
static ErrorCallback customErrorCallback = 0;
@ -1468,6 +1479,9 @@ struct ThreadData
size_t idx; // Thread index in TLS storage. This is not OS thread ID!
};
static bool g_isTlsStorageInitialized = false;
// Main TLS storage class
class TlsStorage
{
@ -1477,6 +1491,7 @@ public:
{
tlsSlots.reserve(32);
threads.reserve(32);
g_isTlsStorageInitialized = true;
}
~TlsStorage()
{
@ -1681,12 +1696,31 @@ static TlsStorage &getTlsStorage()
#ifndef _WIN32 // pthread key destructor
static void opencv_tls_destructor(void* pData)
{
if (!g_isTlsStorageInitialized)
return; // nothing to release, so prefer to avoid creation of new global structures
getTlsStorage().releaseThread(pData);
}
#else // _WIN32
#ifdef CV_USE_FLS
static void WINAPI opencv_fls_destructor(void* pData)
{
// Empiric detection of ExitProcess call
DWORD code = STILL_ACTIVE/*259*/;
BOOL res = GetExitCodeProcess(GetCurrentProcess(), &code);
if (res && code != STILL_ACTIVE)
{
// Looks like we are in ExitProcess() call
// This is FLS specific only because their callback is called before DllMain.
// TLS doesn't have similar problem, DllMain() is called first which mark __termination properly.
// Note: this workaround conflicts with ExitProcess() steps order described in documentation, however it works:
// 3. ... called with DLL_PROCESS_DETACH
// 7. The termination status of the process changes from STILL_ACTIVE to the exit value of the process.
// (ref: https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-exitprocess)
cv::__termination = true;
}
if (!g_isTlsStorageInitialized)
return; // nothing to release, so prefer to avoid creation of new global structures
getTlsStorage().releaseThread(pData);
}
#endif // CV_USE_FLS
@ -1695,6 +1729,13 @@ static void WINAPI opencv_fls_destructor(void* pData)
} // namespace details
using namespace details;
void releaseTlsStorageThread()
{
if (!g_isTlsStorageInitialized)
return; // nothing to release, so prefer to avoid creation of new global structures
getTlsStorage().releaseThread();
}
TLSDataContainer::TLSDataContainer()
{
key_ = (int)getTlsStorage().reserveSlot(this); // Reserve key from TLS storage
@ -1778,7 +1819,7 @@ BOOL WINAPI DllMain(HINSTANCE, DWORD fdwReason, LPVOID lpReserved)
{
// Not allowed to free resources if lpReserved is non-null
// http://msdn.microsoft.com/en-us/library/windows/desktop/ms682583.aspx
cv::getTlsStorage().releaseThread();
releaseTlsStorageThread();
}
}
return TRUE;

@ -16,6 +16,7 @@
#include "opencv2/core/utils/filesystem.hpp"
#include <opencv2/core/utils/configuration.private.hpp>
#include "opencv2/core/utils/filesystem.private.hpp"
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
@ -67,6 +68,7 @@ CV_EXPORTS void addDataSearchSubDirectory(const cv::String& subdir)
_getDataSearchSubDirectory().push_back(subdir);
}
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
static bool isPathSep(char c)
{
return c == '/' || c == '\\';
@ -96,12 +98,14 @@ static bool isSubDirectory_(const cv::String& base_path, const cv::String& path)
}
return true;
}
static bool isSubDirectory(const cv::String& base_path, const cv::String& path)
{
bool res = isSubDirectory_(base_path, path);
CV_LOG_VERBOSE(NULL, 0, "isSubDirectory(): base: " << base_path << " path: " << path << " => result: " << (res ? "TRUE" : "FALSE"));
return res;
}
#endif //OPENCV_HAVE_FILESYSTEM_SUPPORT
static cv::String getModuleLocation(const void* addr)
{
@ -188,6 +192,7 @@ cv::String findDataFile(const cv::String& relative_path,
const std::vector<String>* search_paths,
const std::vector<String>* subdir_paths)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
configuration_parameter = configuration_parameter ? configuration_parameter : "OPENCV_DATA_PATH";
CV_LOG_DEBUG(NULL, cv::format("utils::findDataFile('%s', %s)", relative_path.c_str(), configuration_parameter));
@ -410,10 +415,18 @@ cv::String findDataFile(const cv::String& relative_path,
#endif
return cv::String(); // not found
#else // OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_UNUSED(relative_path);
CV_UNUSED(configuration_parameter);
CV_UNUSED(search_paths);
CV_UNUSED(subdir_paths);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
}
cv::String findDataFile(const cv::String& relative_path, bool required, const char* configuration_parameter)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_LOG_DEBUG(NULL, cv::format("cv::utils::findDataFile('%s', %s, %s)",
relative_path.c_str(), required ? "true" : "false",
configuration_parameter ? configuration_parameter : "NULL"));
@ -424,6 +437,12 @@ cv::String findDataFile(const cv::String& relative_path, bool required, const ch
if (result.empty() && required)
CV_Error(cv::Error::StsError, cv::format("OpenCV: Can't find required data file: %s", relative_path.c_str()));
return result;
#else // OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_UNUSED(relative_path);
CV_UNUSED(required);
CV_UNUSED(configuration_parameter);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
}
}} // namespace

@ -11,6 +11,7 @@
#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
#include "opencv2/core/utils/logger.hpp"
#include "opencv2/core/utils/filesystem.hpp"
#include "opencv2/core/utils/filesystem.private.hpp"
namespace cv { namespace samples {
@ -49,6 +50,7 @@ CV_EXPORTS void addSamplesDataSearchSubDirectory(const cv::String& subdir)
cv::String findFile(const cv::String& relative_path, bool required, bool silentMode)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
CV_LOG_DEBUG(NULL, cv::format("cv::samples::findFile('%s', %s)", relative_path.c_str(), required ? "true" : "false"));
cv::String result = cv::utils::findDataFile(relative_path,
"OPENCV_SAMPLES_DATA_PATH",
@ -61,6 +63,12 @@ cv::String findFile(const cv::String& relative_path, bool required, bool silentM
if (result.empty() && required)
CV_Error(cv::Error::StsError, cv::format("OpenCV samples: Can't find required data file: %s", relative_path.c_str()));
return result;
#else
CV_UNUSED(relative_path);
CV_UNUSED(required);
CV_UNUSED(silentMode);
CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!");
#endif
}

@ -7,6 +7,8 @@
#include "precomp.hpp"
#include <opencv2/core/utils/logger.hpp>
#ifdef HAVE_VA
# include <va/va.h>
#else // HAVE_VA
@ -48,12 +50,28 @@ namespace cv { namespace va_intel {
#ifdef HAVE_VA_INTEL
static clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn clGetDeviceIDsFromVA_APIMediaAdapterINTEL = NULL;
static clCreateFromVA_APIMediaSurfaceINTEL_fn clCreateFromVA_APIMediaSurfaceINTEL = NULL;
static clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn clEnqueueAcquireVA_APIMediaSurfacesINTEL = NULL;
static clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn clEnqueueReleaseVA_APIMediaSurfacesINTEL = NULL;
static bool contextInitialized = false;
class VAAPIInterop : public ocl::Context::UserContext
{
public:
VAAPIInterop(cl_platform_id platform) {
clCreateFromVA_APIMediaSurfaceINTEL = (clCreateFromVA_APIMediaSurfaceINTEL_fn)
clGetExtensionFunctionAddressForPlatform(platform, "clCreateFromVA_APIMediaSurfaceINTEL");
clEnqueueAcquireVA_APIMediaSurfacesINTEL = (clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)
clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueAcquireVA_APIMediaSurfacesINTEL");
clEnqueueReleaseVA_APIMediaSurfacesINTEL = (clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)
clGetExtensionFunctionAddressForPlatform(platform, "clEnqueueReleaseVA_APIMediaSurfacesINTEL");
if (!clCreateFromVA_APIMediaSurfaceINTEL ||
!clEnqueueAcquireVA_APIMediaSurfacesINTEL ||
!clEnqueueReleaseVA_APIMediaSurfacesINTEL) {
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get extension function for VA-API interop");
}
}
virtual ~VAAPIInterop() {
}
clCreateFromVA_APIMediaSurfaceINTEL_fn clCreateFromVA_APIMediaSurfaceINTEL;
clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn clEnqueueAcquireVA_APIMediaSurfacesINTEL;
clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn clEnqueueReleaseVA_APIMediaSurfacesINTEL;
};
#endif // HAVE_VA_INTEL
@ -65,10 +83,8 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
#if !defined(HAVE_VA)
NO_VA_SUPPORT_ERROR;
#else // !HAVE_VA
init_libva();
# ifdef HAVE_VA_INTEL
contextInitialized = false;
if (tryInterop)
{
cl_uint numPlatforms;
@ -97,20 +113,10 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
for (int i = 0; i < (int)numPlatforms; ++i)
{
// Get extension function pointers
clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn clGetDeviceIDsFromVA_APIMediaAdapterINTEL;
clGetDeviceIDsFromVA_APIMediaAdapterINTEL = (clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)
clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromVA_APIMediaAdapterINTEL");
clCreateFromVA_APIMediaSurfaceINTEL = (clCreateFromVA_APIMediaSurfaceINTEL_fn)
clGetExtensionFunctionAddressForPlatform(platforms[i], "clCreateFromVA_APIMediaSurfaceINTEL");
clEnqueueAcquireVA_APIMediaSurfacesINTEL = (clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)
clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueAcquireVA_APIMediaSurfacesINTEL");
clEnqueueReleaseVA_APIMediaSurfacesINTEL = (clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)
clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueReleaseVA_APIMediaSurfacesINTEL");
if (((void*)clGetDeviceIDsFromVA_APIMediaAdapterINTEL == NULL) ||
((void*)clCreateFromVA_APIMediaSurfaceINTEL == NULL) ||
((void*)clEnqueueAcquireVA_APIMediaSurfacesINTEL == NULL) ||
((void*)clEnqueueReleaseVA_APIMediaSurfacesINTEL == NULL))
if ((void*)clGetDeviceIDsFromVA_APIMediaAdapterINTEL == NULL)
{
continue;
}
@ -151,8 +157,6 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
if (found >= 0)
{
contextInitialized = true;
cl_platform_id platform = platforms[found];
std::string platformName = PlatformInfo(&platform).name();
@ -160,6 +164,7 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
try
{
clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
clExecCtx.getContext().setUserContext(std::make_shared<VAAPIInterop>(platform));
}
catch (...)
{
@ -520,7 +525,6 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
#if !defined(HAVE_VA)
NO_VA_SUPPORT_ERROR;
#else // !HAVE_VA
init_libva();
const int stype = CV_8UC3;
@ -531,7 +535,18 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
CV_Assert(srcSize.width == size.width && srcSize.height == size.height);
#ifdef HAVE_VA_INTEL
if (contextInitialized)
ocl::OpenCLExecutionContext& ocl_context = ocl::OpenCLExecutionContext::getCurrent();
VAAPIInterop* interop = ocl_context.getContext().getUserContext<VAAPIInterop>().get();
CV_LOG_IF_DEBUG(NULL, !interop,
"OpenCL/VA_INTEL: Can't interop with current OpenCL context - missing VAAPIInterop API. "
"OpenCL context should be created through initializeContextFromVA()");
void* context_display = ocl_context.getContext().getOpenCLContextProperty(CL_CONTEXT_VA_API_DISPLAY_INTEL);
CV_LOG_IF_INFO(NULL, interop && !context_display,
"OpenCL/VA_INTEL: Can't interop with current OpenCL context - missing VA display, context re-creation is required");
bool isValidContextDisplay = (display == context_display);
CV_LOG_IF_INFO(NULL, interop && context_display && !isValidContextDisplay,
"OpenCL/VA_INTEL: Can't interop with current OpenCL context - VA display mismatch: " << context_display << "(context) vs " << (void*)display << "(surface)");
if (isValidContextDisplay && interop)
{
UMat u = src.getUMat();
@ -541,28 +556,26 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
cl_mem clBuffer = (cl_mem)u.handle(ACCESS_READ);
using namespace cv::ocl;
Context& ctx = Context::getDefault();
cl_context context = (cl_context)ctx.ptr();
cl_context context = (cl_context)ocl_context.getContext().ptr();
cl_int status = 0;
cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 0, &status);
cl_mem clImageY = interop->clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 0, &status);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)");
cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 1, &status);
cl_mem clImageUV = interop->clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_WRITE_ONLY, &surface, 1, &status);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)");
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
cl_command_queue q = (cl_command_queue)ocl_context.getQueue().ptr();
cl_mem images[2] = { clImageY, clImageUV };
status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
status = interop->clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed");
if (!ocl::ocl_convert_bgr_to_nv12(clBuffer, (int)u.step[0], u.cols, u.rows, clImageY, clImageUV))
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_bgr_to_nv12 failed");
clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
interop->clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed");
@ -580,6 +593,7 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
else
# endif // HAVE_VA_INTEL
{
init_libva();
Mat m = src.getMat();
// TODO Add support for roi
@ -626,7 +640,6 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out
#if !defined(HAVE_VA)
NO_VA_SUPPORT_ERROR;
#else // !HAVE_VA
init_libva();
const int dtype = CV_8UC3;
@ -634,7 +647,9 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out
dst.create(size, dtype);
#ifdef HAVE_VA_INTEL
if (contextInitialized)
ocl::OpenCLExecutionContext& ocl_context = ocl::OpenCLExecutionContext::getCurrent();
VAAPIInterop* interop = ocl_context.getContext().getUserContext<VAAPIInterop>().get();
if (display == ocl_context.getContext().getOpenCLContextProperty(CL_CONTEXT_VA_API_DISPLAY_INTEL) && interop)
{
UMat u = dst.getUMat();
@ -644,28 +659,26 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out
cl_mem clBuffer = (cl_mem)u.handle(ACCESS_WRITE);
using namespace cv::ocl;
Context& ctx = Context::getDefault();
cl_context context = (cl_context)ctx.ptr();
cl_context context = (cl_context)ocl_context.getContext().ptr();
cl_int status = 0;
cl_mem clImageY = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 0, &status);
cl_mem clImageY = interop->clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 0, &status);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (Y plane)");
cl_mem clImageUV = clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 1, &status);
cl_mem clImageUV = interop->clCreateFromVA_APIMediaSurfaceINTEL(context, CL_MEM_READ_ONLY, &surface, 1, &status);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clCreateFromVA_APIMediaSurfaceINTEL failed (UV plane)");
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
cl_command_queue q = (cl_command_queue)ocl_context.getQueue().ptr();
cl_mem images[2] = { clImageY, clImageUV };
status = clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
status = interop->clEnqueueAcquireVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueAcquireVA_APIMediaSurfacesINTEL failed");
if (!ocl::ocl_convert_nv12_to_bgr(clImageY, clImageUV, clBuffer, (int)u.step[0], u.cols, u.rows))
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: ocl_convert_nv12_to_bgr failed");
status = clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
status = interop->clEnqueueReleaseVA_APIMediaSurfacesINTEL(q, 2, images, 0, NULL, NULL);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLApiCallError, "OpenCL: clEnqueueReleaseVA_APIMediaSurfacesINTEL failed");
@ -683,6 +696,7 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out
else
# endif // HAVE_VA_INTEL
{
init_libva();
Mat m = dst.getMat();
// TODO Add support for roi

@ -132,6 +132,73 @@ TEST(OpenCL, support_SPIR_programs)
testOpenCLKernel(k);
}
TEST(OpenCL, image2Dcount_regression_19334)
{
cv::ocl::Context ctx = cv::ocl::Context::getDefault();
if (!ctx.ptr())
{
throw cvtest::SkipTestException("OpenCL is not available");
}
cv::ocl::Device device = cv::ocl::Device::getDefault();
if (!device.compilerAvailable())
{
throw cvtest::SkipTestException("OpenCL compiler is not available");
}
std::string module_name; // empty to disable OpenCL cache
static const char* opencl_kernel_src =
"__kernel void test_kernel(int a,\n"
" __global const uchar* src0, int src0_step, int src0_offset, int src0_rows, int src0_cols,\n"
" __global const uchar* src1, int src1_step, int src1_offset, int src1_rows, int src1_cols,\n"
" __global const uchar* src2, int src2_step, int src2_offset, int src2_rows, int src2_cols,\n"
" __read_only image2d_t image)\n"
"{\n"
"}";
cv::ocl::ProgramSource src(module_name, "test_opencl_image_arg", opencl_kernel_src, "");
cv::String errmsg;
cv::ocl::Program program(src, "", errmsg);
ASSERT_TRUE(program.ptr() != NULL);
cv::ocl::Kernel k("test_kernel", program);
ASSERT_FALSE(k.empty());
std::vector<UMat> images(4);
for (size_t i = 0; i < images.size(); ++i)
images[i] = UMat(10, 10, CV_8UC1);
cv::ocl::Image2D image;
try
{
cv::ocl::Image2D image_(images.back());
image = image_;
}
catch (const cv::Exception&)
{
throw cvtest::SkipTestException("OpenCL images are not supported");
}
int nargs = 0;
int a = 0;
nargs = k.set(nargs, a);
ASSERT_EQ(1, nargs);
nargs = k.set(nargs, images[0]);
ASSERT_EQ(6, nargs);
nargs = k.set(nargs, images[1]);
ASSERT_EQ(11, nargs);
nargs = k.set(nargs, images[2]);
ASSERT_EQ(16, nargs);
// do not throw (issue of #19334)
ASSERT_NO_THROW(nargs = k.set(nargs, image));
ASSERT_EQ(17, nargs);
// allow to replace image argument if kernel is not running
UMat image2(10, 10, CV_8UC1);
ASSERT_NO_THROW(nargs = k.set(16, cv::ocl::Image2D(image2)));
ASSERT_EQ(17, nargs);
}
TEST(OpenCL, move_construct_assign)
{
cv::ocl::Context ctx1 = cv::ocl::Context::getDefault();

@ -577,6 +577,25 @@ template<typename R> struct TheTest
return *this;
}
TheTest & test_mul_hi()
{
// typedef typename V_RegTraits<R>::w_reg Rx2;
Data<R> dataA, dataB(32767);
R a = dataA, b = dataB;
R c = v_mul_hi(a, b);
Data<R> resC = c;
const int n = R::nlanes / 2;
for (int i = 0; i < n; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ((typename R::lane_type)((dataA[i] * dataB[i]) >> 16), resC[i]);
}
return *this;
}
TheTest & test_abs()
{
typedef typename V_RegTraits<R>::u_reg Ru;
@ -1663,6 +1682,7 @@ void test_hal_intrin_uint16()
.test_arithm_wrap()
.test_mul()
.test_mul_expand()
.test_mul_hi()
.test_cmp()
.test_shift<1>()
.test_shift<8>()
@ -1697,6 +1717,7 @@ void test_hal_intrin_int16()
.test_arithm_wrap()
.test_mul()
.test_mul_expand()
.test_mul_hi()
.test_cmp()
.test_shift<1>()
.test_shift<8>()

@ -2355,4 +2355,98 @@ TEST(Mat, regression_18473)
}
TEST(Mat, ptrVecni_20044)
{
Mat_<int> m(3,4); m << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12;
Vec2i idx(1,1);
uchar *u = m.ptr(idx);
EXPECT_EQ(int(6), *(int*)(u));
const uchar *cu = m.ptr(idx);
EXPECT_EQ(int(6), *(int*)(cu));
int *i = m.ptr<int>(idx);
EXPECT_EQ(int(6), *(i));
const int *ci = m.ptr<int>(idx);
EXPECT_EQ(int(6), *(ci));
}
TEST(Mat, reverse_iterator_19967)
{
// empty iterator (#16855)
cv::Mat m_empty;
EXPECT_NO_THROW(m_empty.rbegin<uchar>());
EXPECT_NO_THROW(m_empty.rend<uchar>());
EXPECT_TRUE(m_empty.rbegin<uchar>() == m_empty.rend<uchar>());
// 1D test
std::vector<uchar> data{0, 1, 2, 3};
const std::vector<int> sizes_1d{4};
//Base class
cv::Mat m_1d(sizes_1d, CV_8U, data.data());
auto mismatch_it_pair_1d = std::mismatch(data.rbegin(), data.rend(), m_1d.rbegin<uchar>());
EXPECT_EQ(mismatch_it_pair_1d.first, data.rend()); // expect no mismatch
EXPECT_EQ(mismatch_it_pair_1d.second, m_1d.rend<uchar>());
//Templated derived class
cv::Mat_<uchar> m_1d_t(static_cast<int>(sizes_1d.size()), sizes_1d.data(), data.data());
auto mismatch_it_pair_1d_t = std::mismatch(data.rbegin(), data.rend(), m_1d_t.rbegin());
EXPECT_EQ(mismatch_it_pair_1d_t.first, data.rend()); // expect no mismatch
EXPECT_EQ(mismatch_it_pair_1d_t.second, m_1d_t.rend());
// 2D test
const std::vector<int> sizes_2d{2, 2};
//Base class
cv::Mat m_2d(sizes_2d, CV_8U, data.data());
auto mismatch_it_pair_2d = std::mismatch(data.rbegin(), data.rend(), m_2d.rbegin<uchar>());
EXPECT_EQ(mismatch_it_pair_2d.first, data.rend());
EXPECT_EQ(mismatch_it_pair_2d.second, m_2d.rend<uchar>());
//Templated derived class
cv::Mat_<uchar> m_2d_t(static_cast<int>(sizes_2d.size()),sizes_2d.data(), data.data());
auto mismatch_it_pair_2d_t = std::mismatch(data.rbegin(), data.rend(), m_2d_t.rbegin());
EXPECT_EQ(mismatch_it_pair_2d_t.first, data.rend());
EXPECT_EQ(mismatch_it_pair_2d_t.second, m_2d_t.rend());
// 3D test
std::vector<uchar> data_3d{0, 1, 2, 3, 4, 5, 6, 7};
const std::vector<int> sizes_3d{2, 2, 2};
//Base class
cv::Mat m_3d(sizes_3d, CV_8U, data_3d.data());
auto mismatch_it_pair_3d = std::mismatch(data_3d.rbegin(), data_3d.rend(), m_3d.rbegin<uchar>());
EXPECT_EQ(mismatch_it_pair_3d.first, data_3d.rend());
EXPECT_EQ(mismatch_it_pair_3d.second, m_3d.rend<uchar>());
//Templated derived class
cv::Mat_<uchar> m_3d_t(static_cast<int>(sizes_3d.size()),sizes_3d.data(), data_3d.data());
auto mismatch_it_pair_3d_t = std::mismatch(data_3d.rbegin(), data_3d.rend(), m_3d_t.rbegin());
EXPECT_EQ(mismatch_it_pair_3d_t.first, data_3d.rend());
EXPECT_EQ(mismatch_it_pair_3d_t.second, m_3d_t.rend());
// const test base class
const cv::Mat m_1d_const(sizes_1d, CV_8U, data.data());
auto mismatch_it_pair_1d_const = std::mismatch(data.rbegin(), data.rend(), m_1d_const.rbegin<uchar>());
EXPECT_EQ(mismatch_it_pair_1d_const.first, data.rend()); // expect no mismatch
EXPECT_EQ(mismatch_it_pair_1d_const.second, m_1d_const.rend<uchar>());
EXPECT_FALSE((std::is_assignable<decltype(m_1d_const.rend<uchar>()), uchar>::value)) << "Constness of const iterator violated.";
EXPECT_FALSE((std::is_assignable<decltype(m_1d_const.rbegin<uchar>()), uchar>::value)) << "Constness of const iterator violated.";
// const test templated dervied class
const cv::Mat_<uchar> m_1d_const_t(static_cast<int>(sizes_1d.size()), sizes_1d.data(), data.data());
auto mismatch_it_pair_1d_const_t = std::mismatch(data.rbegin(), data.rend(), m_1d_const_t.rbegin());
EXPECT_EQ(mismatch_it_pair_1d_const_t.first, data.rend()); // expect no mismatch
EXPECT_EQ(mismatch_it_pair_1d_const_t.second, m_1d_const_t.rend());
EXPECT_FALSE((std::is_assignable<decltype(m_1d_const_t.rend()), uchar>::value)) << "Constness of const iterator violated.";
EXPECT_FALSE((std::is_assignable<decltype(m_1d_const_t.rbegin()), uchar>::value)) << "Constness of const iterator violated.";
}
}} // namespace

@ -9,6 +9,7 @@
#include "opencv2/core/utils/buffer_area.private.hpp"
#include "test_utils_tls.impl.hpp"
#include "opencv2/core/utils/filesystem.private.hpp"
namespace opencv_test { namespace {
@ -336,7 +337,7 @@ TEST(Logger, DISABLED_message_if)
}
}
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
TEST(Samples, findFile)
{
cv::utils::logging::LogLevel prev = cv::utils::logging::setLogLevel(cv::utils::logging::LOG_LEVEL_VERBOSE);
@ -353,6 +354,7 @@ TEST(Samples, findFile_missing)
ASSERT_ANY_THROW(path = samples::findFile("non-existed.file", true));
cv::utils::logging::setLogLevel(prev);
}
#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
template <typename T>
inline bool buffers_overlap(T * first, size_t first_num, T * second, size_t second_num)

@ -176,16 +176,20 @@ ocv_add_perf_tests(${INF_ENGINE_TARGET}
FILES Include ${perf_hdrs}
)
ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
ocv_option(${the_module}_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)
ocv_option(OPENCV_DNN_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
ocv_option(OPENCV_DNN_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)
if(BUILD_PERF_TESTS)
if (${the_module}_PERF_CAFFE)
if (OPENCV_DNN_PERF_CAFFE
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
)
find_package(Caffe QUIET)
if (Caffe_FOUND)
add_definitions(-DHAVE_CAFFE=1)
ocv_target_link_libraries(opencv_perf_dnn caffe)
endif()
elseif(${the_module}_PERF_CLCAFFE)
elseif(OPENCV_DNN_PERF_CLCAFFE
OR ${the_module}_PERF_CAFFE # compatibility for deprecated option
)
find_package(Caffe QUIET)
if (Caffe_FOUND)
add_definitions(-DHAVE_CLCAFFE=1)

@ -738,9 +738,11 @@ CV__DNN_INLINE_NS_BEGIN
CV_WRAP void enableFusion(bool fusion);
/** @brief Returns overall time for inference and timings (in ticks) for layers.
*
* Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
* in this case zero ticks count will be return for that skipped layers.
* @param timings vector for tick timings for all layers.
* in this case zero ticks count will be return for that skipped layers. Supported by DNN_BACKEND_OPENCV on DNN_TARGET_CPU only.
*
* @param[out] timings vector for tick timings for all layers.
* @return overall ticks for model inference.
*/
CV_WRAP int64 getPerfProfile(CV_OUT std::vector<double>& timings);

@ -20,6 +20,9 @@
#include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/utils/logger.hpp>
#include "opencv2/core/utils/filesystem.hpp"
#include "opencv2/core/utils/filesystem.private.hpp"
namespace cv { namespace dnn {
#ifdef HAVE_DNN_NGRAPH
@ -683,6 +686,23 @@ void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net)
ie.SetConfig({{
InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, format("%d", getNumThreads()),
}}, device_name);
#endif
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2021_2)
if (device_name.find("GPU") == 0)
{
#if OPENCV_HAVE_FILESYSTEM_SUPPORT
std::string cache_path = utils::fs::getCacheDirectory((std::string("dnn_ie_cache_") + device_name).c_str(), "OPENCV_DNN_IE_GPU_CACHE_DIR");
#else
std::string cache_path = utils::getConfigurationParameterString("OPENCV_DNN_IE_GPU_CACHE_DIR", "");
#endif
if (!cache_path.empty() && cache_path != "disabled")
{
CV_LOG_INFO(NULL, "OpenCV/nGraph: using GPU kernels cache: " << cache_path);
ie.SetConfig({{
InferenceEngine::PluginConfigParams::KEY_CACHE_DIR, cache_path,
}}, device_name);
}
}
#endif
}
std::map<std::string, std::string> config;

@ -133,7 +133,8 @@ public:
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
auto rois = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
std::vector<size_t> dims = rois->get_shape(), offsets(4, 0);
auto rois_shape = rois->get_shape();
std::vector<int64_t> dims(rois_shape.begin(), rois_shape.end()), offsets(4, 0);
offsets[3] = 2;
dims[3] = 7;
@ -147,7 +148,7 @@ public:
lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
// Reshape rois from 4D to 2D
std::vector<size_t> shapeData = {dims[2], 5};
std::vector<int64_t> shapeData = {dims[2], 5};
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shapeData.data());
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(slice, shape, true);

@ -61,25 +61,11 @@ easily switch between different algorithms solving the same problem. This sectio
matching descriptors that are represented as vectors in a multidimensional space. All objects that
implement vector descriptor matchers inherit the DescriptorMatcher interface.
@note
- An example explaining keypoint matching can be found at
opencv_source_code/samples/cpp/descriptor_extractor_matcher.cpp
- An example on descriptor matching evaluation can be found at
opencv_source_code/samples/cpp/detector_descriptor_matcher_evaluation.cpp
- An example on one to many image matching can be found at
opencv_source_code/samples/cpp/matching_to_many_images.cpp
@defgroup features2d_draw Drawing Function of Keypoints and Matches
@defgroup features2d_category Object Categorization
This section describes approaches based on local 2D features and used to categorize objects.
@note
- A complete Bag-Of-Words sample can be found at
opencv_source_code/samples/cpp/bagofwords_classification.cpp
- (Python) An example using the features2D framework to perform object categorization can be
found at opencv_source_code/samples/python/find_obj.py
@defgroup feature2d_hal Hardware Acceleration Layer
@{
@defgroup features2d_hal_interface Interface
@ -90,7 +76,7 @@ This section describes approaches based on local 2D features and used to categor
namespace cv
{
//! @addtogroup features2d
//! @addtogroup features2d_main
//! @{
// //! writes vector of keypoints to the file storage
@ -237,9 +223,6 @@ the vector descriptor extractors inherit the DescriptorExtractor interface.
*/
typedef Feature2D DescriptorExtractor;
//! @addtogroup features2d_main
//! @{
/** @brief Class for implementing the wrapper which makes detectors and extractors to be affine invariant,
described as ASIFT in @cite YM11 .
@ -486,20 +469,20 @@ class CV_EXPORTS_W MSER : public Feature2D
public:
/** @brief Full constructor for %MSER detector
@param _delta it compares \f$(size_{i}-size_{i-delta})/size_{i-delta}\f$
@param _min_area prune the area which smaller than minArea
@param _max_area prune the area which bigger than maxArea
@param _max_variation prune the area have similar size to its children
@param _min_diversity for color image, trace back to cut off mser with diversity less than min_diversity
@param _max_evolution for color image, the evolution steps
@param _area_threshold for color image, the area threshold to cause re-initialize
@param _min_margin for color image, ignore too small margin
@param _edge_blur_size for color image, the aperture size for edge blur
@param delta it compares \f$(size_{i}-size_{i-delta})/size_{i-delta}\f$
@param min_area prune the area which smaller than minArea
@param max_area prune the area which bigger than maxArea
@param max_variation prune the area have similar size to its children
@param min_diversity for color image, trace back to cut off mser with diversity less than min_diversity
@param max_evolution for color image, the evolution steps
@param area_threshold for color image, the area threshold to cause re-initialize
@param min_margin for color image, ignore too small margin
@param edge_blur_size for color image, the aperture size for edge blur
*/
CV_WRAP static Ptr<MSER> create( int _delta=5, int _min_area=60, int _max_area=14400,
double _max_variation=0.25, double _min_diversity=.2,
int _max_evolution=200, double _area_threshold=1.01,
double _min_margin=0.003, int _edge_blur_size=5 );
CV_WRAP static Ptr<MSER> create( int delta=5, int min_area=60, int max_area=14400,
double max_variation=0.25, double min_diversity=.2,
int max_evolution=200, double area_threshold=1.01,
double min_margin=0.003, int edge_blur_size=5 );
/** @brief Detect %MSER regions

@ -167,17 +167,15 @@ class SinglePolicy
public:
static base_any_policy* get_policy();
private:
static typename choose_policy<T>::type policy;
};
template <typename T>
typename choose_policy<T>::type SinglePolicy<T>::policy;
/// This function will return a different policy for each type.
template <typename T>
inline base_any_policy* SinglePolicy<T>::get_policy() { return &policy; }
inline base_any_policy* SinglePolicy<T>::get_policy()
{
static typename choose_policy<T>::type policy;
return &policy;
}
} // namespace anyimpl

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save