Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/18842/head
Alexander Alekhin 4 years ago
commit ce8027c6fb
  1. 4
      3rdparty/libjpeg-turbo/CMakeLists.txt
  2. 2
      3rdparty/libjpeg-turbo/LICENSE.md
  3. 22
      3rdparty/libjpeg-turbo/README.ijg
  4. 21
      3rdparty/libjpeg-turbo/README.md
  5. 4
      3rdparty/libjpeg-turbo/src/jchuff.c
  6. 5
      3rdparty/libjpeg-turbo/src/jcinit.c
  7. 4
      3rdparty/libjpeg-turbo/src/jcphuff.c
  8. 5
      3rdparty/libjpeg-turbo/src/jctrans.c
  9. 45
      3rdparty/libjpeg-turbo/src/jdapistd.c
  10. 8
      3rdparty/libjpeg-turbo/src/jdcoefct.c
  11. 9
      3rdparty/libjpeg-turbo/src/jdcolor.c
  12. 55
      3rdparty/libjpeg-turbo/src/jdmerge.c
  13. 47
      3rdparty/libjpeg-turbo/src/jdmerge.h
  14. 10
      3rdparty/libjpeg-turbo/src/jdmrg565.c
  15. 6
      3rdparty/libjpeg-turbo/src/jdmrgext.c
  16. 5
      3rdparty/libjpeg-turbo/src/jdtrans.c
  17. 4
      3rdparty/libjpeg-turbo/src/jfdctint.c
  18. 4
      3rdparty/libjpeg-turbo/src/jidctint.c
  19. 8
      3rdparty/libjpeg-turbo/src/jmorecfg.h
  20. 3
      3rdparty/libjpeg-turbo/src/jpegcomp.h
  21. 8
      3rdparty/libjpeg-turbo/src/jpeglib.h
  22. 6
      3rdparty/libjpeg-turbo/src/jquant2.c
  23. 14
      3rdparty/libjpeg-turbo/src/jversion.h
  24. 2
      apps/interactive-calibration/parametersController.cpp
  25. 2
      doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown
  26. 5
      modules/core/include/opencv2/core/cv_cpu_dispatch.h
  27. 4
      modules/core/include/opencv2/core/cvdef.h
  28. 21
      modules/core/include/opencv2/core/hal/intrin_avx.hpp
  29. 5
      modules/core/src/convert.simd.hpp
  30. 8
      modules/core/test/test_intrin.cpp
  31. 6
      modules/core/test/test_intrin_utils.hpp
  32. 3
      modules/dnn/include/opencv2/dnn/dnn.hpp
  33. 55
      modules/dnn/src/dnn.cpp
  34. 49
      modules/dnn/src/layers/pooling_layer.cpp
  35. 51
      modules/dnn/src/layers/reshape_layer.cpp
  36. 13
      modules/dnn/src/layers/slice_layer.cpp
  37. 47
      modules/dnn/src/onnx/onnx_graph_simplifier.cpp
  38. 109
      modules/dnn/src/onnx/onnx_importer.cpp
  39. 39
      modules/dnn/test/test_onnx_importer.cpp

@ -4,9 +4,9 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-6
set(VERSION_MAJOR 2)
set(VERSION_MINOR 0)
set(VERSION_REVISION 5)
set(VERSION_REVISION 6)
set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
set(LIBJPEG_TURBO_VERSION_NUMBER 2000005)
set(LIBJPEG_TURBO_VERSION_NUMBER 2000006)
string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
if(CMAKE_BUILD_TYPE STREQUAL "Debug")

@ -91,7 +91,7 @@ best of our understanding.
The Modified (3-clause) BSD License
===================================
Copyright (C)2009-2019 D. R. Commander. All Rights Reserved.
Copyright (C)2009-2020 D. R. Commander. All Rights Reserved.
Copyright (C)2015 Viktor Szathmáry. All Rights Reserved.
Redistribution and use in source and binary forms, with or without

@ -223,12 +223,12 @@ https://www.iso.org/standard/54989.html and http://www.itu.int/rec/T-REC-T.871.
A PDF file of the older JFIF 1.02 specification is available at
http://www.w3.org/Graphics/JPEG/jfif3.pdf.
The TIFF 6.0 file format specification can be obtained by FTP from
ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme
found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
(Compression tag 7). Copies of this Note can be obtained from
The TIFF 6.0 file format specification can be obtained from
http://mirrors.ctan.org/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation
scheme found in the TIFF 6.0 spec of 3-June-92 has a number of serious
problems. IJG does not recommend use of the TIFF 6.0 design (TIFF Compression
tag 6). Instead, we recommend the JPEG design proposed by TIFF Technical Note
#2 (Compression tag 7). Copies of this Note can be obtained from
http://www.ijg.org/files/. It is expected that the next revision
of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
Although IJG's own code does not support TIFF/JPEG, the free libtiff library
@ -243,14 +243,8 @@ The most recent released version can always be found there in
directory "files".
The JPEG FAQ (Frequently Asked Questions) article is a source of some
general information about JPEG.
It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
and other news.answers archive sites, including the official news.answers
archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
with body
send usenet/news.answers/jpeg-faq/part1
send usenet/news.answers/jpeg-faq/part2
general information about JPEG. It is available at
http://www.faqs.org/faqs/jpeg-faq.
FILE FORMAT COMPATIBILITY

@ -2,7 +2,7 @@ Background
==========
libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
MIPS systems, as well as progressive JPEG compression on x86 and x86-64
systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
all else being equal. On other types of systems, libjpeg-turbo can still
@ -179,8 +179,8 @@ supported and which aren't.
NOTE: As of this writing, extensive research has been conducted into the
usefulness of DCT scaling as a means of data reduction and SmartScale as a
means of quality improvement. The reader is invited to peruse the research at
<http://www.libjpeg-turbo.org/About/SmartScale> and draw his/her own conclusions,
means of quality improvement. Readers are invited to peruse the research at
<http://www.libjpeg-turbo.org/About/SmartScale> and draw their own conclusions,
but it is the general belief of our project that these features have not
demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.
@ -287,12 +287,13 @@ following reasons:
(and slightly faster) floating point IDCT algorithm introduced in libjpeg
v8a as opposed to the algorithm used in libjpeg v6b. It should be noted,
however, that this algorithm basically brings the accuracy of the floating
point IDCT in line with the accuracy of the slow integer IDCT. The floating
point DCT/IDCT algorithms are mainly a legacy feature, and they do not
produce significantly more accuracy than the slow integer algorithms (to put
numbers on this, the typical difference in PNSR between the two algorithms
is less than 0.10 dB, whereas changing the quality level by 1 in the upper
range of the quality scale is typically more like a 1.0 dB difference.)
point IDCT in line with the accuracy of the accurate integer IDCT. The
floating point DCT/IDCT algorithms are mainly a legacy feature, and they do
not produce significantly more accuracy than the accurate integer algorithms
(to put numbers on this, the typical difference in PNSR between the two
algorithms is less than 0.10 dB, whereas changing the quality level by 1 in
the upper range of the quality scale is typically more like a 1.0 dB
difference.)
- If the floating point algorithms in libjpeg-turbo are not implemented using
SIMD instructions on a particular platform, then the accuracy of the
@ -340,7 +341,7 @@ The algorithm used by the SIMD-accelerated quantization function cannot produce
correct results whenever the fast integer forward DCT is used along with a JPEG
quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization
function in those cases. This causes performance to drop by as much as 40%.
It is therefore strongly advised that you use the slow integer forward DCT
It is therefore strongly advised that you use the accurate integer forward DCT
whenever encoding images with a JPEG quality of 98 or higher.

@ -34,10 +34,10 @@
* memory footprint by 64k, which is important for some mobile applications
* that create many isolated instances of libjpeg-turbo (web browsers, for
* instance.) This may improve performance on some mobile platforms as well.
* This feature is enabled by default only on ARM processors, because some x86
* This feature is enabled by default only on Arm processors, because some x86
* chips have a slow implementation of bsr, and the use of clz/bsr cannot be
* shown to have a significant performance impact even on the x86 chips that
* have a fast implementation of it. When building for ARMv6, you can
* have a fast implementation of it. When building for Armv6, you can
* explicitly disable the use of clz/bsr by adding -mthumb to the compiler
* flags (this defines __thumb__).
*/

@ -1,8 +1,10 @@
/*
* jcinit.c
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* libjpeg-turbo Modifications:
* Copyright (C) 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -19,6 +21,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/*

@ -43,10 +43,10 @@
* memory footprint by 64k, which is important for some mobile applications
* that create many isolated instances of libjpeg-turbo (web browsers, for
* instance.) This may improve performance on some mobile platforms as well.
* This feature is enabled by default only on ARM processors, because some x86
* This feature is enabled by default only on Arm processors, because some x86
* chips have a slow implementation of bsr, and the use of clz/bsr cannot be
* shown to have a significant performance impact even on the x86 chips that
* have a fast implementation of it. When building for ARMv6, you can
* have a fast implementation of it. When building for Armv6, you can
* explicitly disable the use of clz/bsr by adding -mthumb to the compiler
* flags (this defines __thumb__).
*/

@ -4,8 +4,8 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1995-1998, Thomas G. Lane.
* Modified 2000-2009 by Guido Vollbeding.
* It was modified by The libjpeg-turbo Project to include only code relevant
* to libjpeg-turbo.
* libjpeg-turbo Modifications:
* Copyright (C) 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -17,6 +17,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/* Forward declarations */

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1994-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2010, 2015-2018, D. R. Commander.
* Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@ -21,6 +21,8 @@
#include "jinclude.h"
#include "jdmainct.h"
#include "jdcoefct.h"
#include "jdmaster.h"
#include "jdmerge.h"
#include "jdsample.h"
#include "jmemsys.h"
@ -316,6 +318,8 @@ LOCAL(void)
read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
{
JDIMENSION n;
my_master_ptr master = (my_master_ptr)cinfo->master;
JSAMPARRAY scanlines = NULL;
void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION input_row, JSAMPARRAY output_buf,
int num_rows) = NULL;
@ -332,8 +336,13 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
cinfo->cquantize->color_quantize = noop_quantize;
}
if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
scanlines = &upsample->spare_row;
}
for (n = 0; n < num_lines; n++)
jpeg_read_scanlines(cinfo, NULL, 1);
jpeg_read_scanlines(cinfo, scanlines, 1);
if (color_convert)
cinfo->cconvert->color_convert = color_convert;
@ -353,6 +362,12 @@ increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows)
{
JDIMENSION rows_left;
my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
my_master_ptr master = (my_master_ptr)cinfo->master;
if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
read_and_discard_scanlines(cinfo, rows);
return;
}
/* Increment the counter to the next row group after the skipped rows. */
main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor;
@ -382,21 +397,27 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
{
my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
my_master_ptr master = (my_master_ptr)cinfo->master;
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
JDIMENSION i, x;
int y;
JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
JDIMENSION lines_to_skip, lines_to_read;
/* Two-pass color quantization is not supported. */
if (cinfo->quantize_colors && cinfo->two_pass_quantize)
ERREXIT(cinfo, JERR_NOTIMPL);
if (cinfo->global_state != DSTATE_SCANNING)
ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
/* Do not skip past the bottom of the image. */
if (cinfo->output_scanline + num_lines >= cinfo->output_height) {
num_lines = cinfo->output_height - cinfo->output_scanline;
cinfo->output_scanline = cinfo->output_height;
(*cinfo->inputctl->finish_input_pass) (cinfo);
cinfo->inputctl->eoi_reached = TRUE;
return cinfo->output_height - cinfo->output_scanline;
return num_lines;
}
if (num_lines == 0)
@ -445,8 +466,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
main_ptr->buffer_full = FALSE;
main_ptr->rowgroup_ctr = 0;
main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
upsample->next_row_out = cinfo->max_v_samp_factor;
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
if (!master->using_merged_upsample) {
upsample->next_row_out = cinfo->max_v_samp_factor;
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
}
}
/* Skipping is much simpler when context rows are not required. */
@ -458,8 +481,10 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
cinfo->output_scanline += lines_left_in_iMCU_row;
main_ptr->buffer_full = FALSE;
main_ptr->rowgroup_ctr = 0;
upsample->next_row_out = cinfo->max_v_samp_factor;
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
if (!master->using_merged_upsample) {
upsample->next_row_out = cinfo->max_v_samp_factor;
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
}
}
}
@ -494,7 +519,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
increment_simple_rowgroup_ctr(cinfo, lines_to_read);
}
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
if (!master->using_merged_upsample)
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
return num_lines;
}
@ -535,7 +561,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
* bit odd, since "rows_to_go" seems to be redundantly keeping track of
* output_scanline.
*/
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
if (!master->using_merged_upsample)
upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
/* Always skip the requested number of lines. */
return num_lines;

@ -6,7 +6,7 @@
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2010, 2015-2016, D. R. Commander.
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, 2020, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -495,11 +495,13 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
if (first_row && block_row == 0)
prev_block_row = buffer_ptr;
else
prev_block_row = buffer[block_row - 1];
prev_block_row = buffer[block_row - 1] +
cinfo->master->first_MCU_col[ci];
if (last_row && block_row == block_rows - 1)
next_block_row = buffer_ptr;
else
next_block_row = buffer[block_row + 1];
next_block_row = buffer[block_row + 1] +
cinfo->master->first_MCU_col[ci];
/* We fetch the surrounding DC values using a sliding-register approach.
* Initialize all nine here so as to do the right thing on narrow pics.
*/

@ -571,11 +571,10 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
* RGB565 conversion
*/
#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \
(((g) << 3) & 0x7E0) | ((b) >> 3))
#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \
(((g) << 11) & 0xE000) | \
(((b) << 5) & 0x1F00))
#define PACK_SHORT_565_LE(r, g, b) \
((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
#define PACK_SHORT_565_BE(r, g, b) \
(((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l)
#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r)

@ -5,7 +5,7 @@
* Copyright (C) 1994-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009, 2011, 2014-2015, D. R. Commander.
* Copyright (C) 2009, 2011, 2014-2015, 2020, D. R. Commander.
* Copyright (C) 2013, Linaro Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@ -40,41 +40,13 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jdmerge.h"
#include "jsimd.h"
#include "jconfigint.h"
#ifdef UPSAMPLE_MERGING_SUPPORTED
/* Private subobject */
typedef struct {
struct jpeg_upsampler pub; /* public fields */
/* Pointer to routine to do actual upsampling/conversion of one row group */
void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
/* Private state for YCC->RGB conversion */
int *Cr_r_tab; /* => table for Cr to R conversion */
int *Cb_b_tab; /* => table for Cb to B conversion */
JLONG *Cr_g_tab; /* => table for Cr to G conversion */
JLONG *Cb_g_tab; /* => table for Cb to G conversion */
/* For 2:1 vertical sampling, we produce two output rows at a time.
* We need a "spare" row buffer to hold the second output row if the
* application provides just a one-row buffer; we also use the spare
* to discard the dummy last row if the image height is odd.
*/
JSAMPROW spare_row;
boolean spare_full; /* T if spare buffer is occupied */
JDIMENSION out_row_width; /* samples per output row */
JDIMENSION rows_to_go; /* counts rows remaining in image */
} my_upsampler;
typedef my_upsampler *my_upsample_ptr;
#define SCALEBITS 16 /* speediest right-shift on some machines */
#define ONE_HALF ((JLONG)1 << (SCALEBITS - 1))
#define FIX(x) ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
@ -189,7 +161,7 @@ typedef my_upsampler *my_upsample_ptr;
LOCAL(void)
build_ycc_rgb_table(j_decompress_ptr cinfo)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
int i;
JLONG x;
SHIFT_TEMPS
@ -232,7 +204,7 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
METHODDEF(void)
start_pass_merged_upsample(j_decompress_ptr cinfo)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
/* Mark the spare buffer empty */
upsample->spare_full = FALSE;
@ -254,7 +226,7 @@ merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
/* 2:1 vertical sampling case: may need a spare row. */
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
JSAMPROW work_ptrs[2];
JDIMENSION num_rows; /* number of rows returned to caller */
@ -305,7 +277,7 @@ merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
/* 1:1 vertical sampling case: much easier, never need a spare row. */
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
/* Just do the upsampling. */
(*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
@ -420,11 +392,10 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
* RGB565 conversion
*/
#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \
(((g) << 3) & 0x7E0) | ((b) >> 3))
#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \
(((g) << 11) & 0xE000) | \
(((b) << 5) & 0x1F00))
#define PACK_SHORT_565_LE(r, g, b) \
((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
#define PACK_SHORT_565_BE(r, g, b) \
(((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l)
#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r)
@ -566,11 +537,11 @@ h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
GLOBAL(void)
jinit_merged_upsampler(j_decompress_ptr cinfo)
{
my_upsample_ptr upsample;
my_merged_upsample_ptr upsample;
upsample = (my_upsample_ptr)
upsample = (my_merged_upsample_ptr)
(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
sizeof(my_upsampler));
sizeof(my_merged_upsampler));
cinfo->upsample = (struct jpeg_upsampler *)upsample;
upsample->pub.start_pass = start_pass_merged_upsample;
upsample->pub.need_context_rows = FALSE;

@ -0,0 +1,47 @@
/*
* jdmerge.h
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1994-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*/
#define JPEG_INTERNALS
#include "jpeglib.h"
#ifdef UPSAMPLE_MERGING_SUPPORTED
/* Private subobject */
typedef struct {
struct jpeg_upsampler pub; /* public fields */
/* Pointer to routine to do actual upsampling/conversion of one row group */
void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
/* Private state for YCC->RGB conversion */
int *Cr_r_tab; /* => table for Cr to R conversion */
int *Cb_b_tab; /* => table for Cb to B conversion */
JLONG *Cr_g_tab; /* => table for Cr to G conversion */
JLONG *Cb_g_tab; /* => table for Cb to G conversion */
/* For 2:1 vertical sampling, we produce two output rows at a time.
* We need a "spare" row buffer to hold the second output row if the
* application provides just a one-row buffer; we also use the spare
* to discard the dummy last row if the image height is odd.
*/
JSAMPROW spare_row;
boolean spare_full; /* T if spare buffer is occupied */
JDIMENSION out_row_width; /* samples per output row */
JDIMENSION rows_to_go; /* counts rows remaining in image */
} my_merged_upsampler;
typedef my_merged_upsampler *my_merged_upsample_ptr;
#endif /* UPSAMPLE_MERGING_SUPPORTED */

@ -5,7 +5,7 @@
* Copyright (C) 1994-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2013, Linaro Limited.
* Copyright (C) 2014-2015, 2018, D. R. Commander.
* Copyright (C) 2014-2015, 2018, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -19,7 +19,7 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr;
@ -90,7 +90,7 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr;
@ -163,7 +163,7 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr0, outptr1;
@ -259,7 +259,7 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr0, outptr1;

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1994-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2011, 2015, D. R. Commander.
* Copyright (C) 2011, 2015, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -25,7 +25,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr;
@ -97,7 +97,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
JDIMENSION in_row_group_ctr,
JSAMPARRAY output_buf)
{
my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
register int y, cred, cgreen, cblue;
int cb, cr;
register JSAMPROW outptr0, outptr1;

@ -3,8 +3,8 @@
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1995-1997, Thomas G. Lane.
* It was modified by The libjpeg-turbo Project to include only code relevant
* to libjpeg-turbo.
* libjpeg-turbo Modifications:
* Copyright (C) 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -16,6 +16,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jpegcomp.h"
/* Forward declarations */

@ -4,11 +4,11 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, D. R. Commander.
* Copyright (C) 2015, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
* This file contains a slow-but-accurate integer implementation of the
* This file contains a slower but more accurate integer implementation of the
* forward DCT (Discrete Cosine Transform).
*
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT

@ -5,11 +5,11 @@
* Copyright (C) 1991-1998, Thomas G. Lane.
* Modification developed 2002-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, D. R. Commander.
* Copyright (C) 2015, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
* This file contains a slow-but-accurate integer implementation of the
* This file contains a slower but more accurate integer implementation of the
* inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
* must also perform dequantization of the input coefficients.
*

@ -5,7 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* Modified 1997-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2009, 2011, 2014-2015, 2018, D. R. Commander.
* Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -273,9 +273,9 @@ typedef int boolean;
/* Capability options common to encoder and decoder: */
#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */
#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */
#define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */
#define DCT_ISLOW_SUPPORTED /* accurate integer method */
#define DCT_IFAST_SUPPORTED /* less accurate int method [legacy feature] */
#define DCT_FLOAT_SUPPORTED /* floating-point method [legacy feature] */
/* Encoder capability options: */

@ -1,7 +1,7 @@
/*
* jpegcomp.h
*
* Copyright (C) 2010, D. R. Commander.
* Copyright (C) 2010, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -19,6 +19,7 @@
#define _min_DCT_v_scaled_size min_DCT_v_scaled_size
#define _jpeg_width jpeg_width
#define _jpeg_height jpeg_height
#define JERR_ARITH_NOTIMPL JERR_NOT_COMPILED
#else
#define _DCT_scaled_size DCT_scaled_size
#define _DCT_h_scaled_size DCT_scaled_size

@ -5,7 +5,7 @@
* Copyright (C) 1991-1998, Thomas G. Lane.
* Modified 2002-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2013-2014, 2016-2017, D. R. Commander.
* Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, D. R. Commander.
* Copyright (C) 2015, Google, Inc.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@ -244,9 +244,9 @@ typedef enum {
/* DCT/IDCT algorithm options. */
typedef enum {
JDCT_ISLOW, /* slow but accurate integer algorithm */
JDCT_IFAST, /* faster, less accurate integer method */
JDCT_FLOAT /* floating-point: accurate, fast on fast HW */
JDCT_ISLOW, /* accurate integer method */
JDCT_IFAST, /* less accurate integer method [legacy feature] */
JDCT_FLOAT /* floating-point method [legacy feature] */
} J_DCT_METHOD;
#ifndef JDCT_DEFAULT /* may be overridden in jconfig.h */

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009, 2014-2015, D. R. Commander.
* Copyright (C) 2009, 2014-2015, 2020, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -1145,7 +1145,7 @@ start_pass_2_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
int i;
/* Only F-S dithering or no dithering is supported. */
/* If user asks for ordered dither, give him F-S. */
/* If user asks for ordered dither, give them F-S. */
if (cinfo->dither_mode != JDITHER_NONE)
cinfo->dither_mode = JDITHER_FS;
@ -1263,7 +1263,7 @@ jinit_2pass_quantizer(j_decompress_ptr cinfo)
cquantize->sv_colormap = NULL;
/* Only F-S dithering or no dithering is supported. */
/* If user asks for ordered dither, give him F-S. */
/* If user asks for ordered dither, give them F-S. */
if (cinfo->dither_mode != JDITHER_NONE)
cinfo->dither_mode = JDITHER_FS;

@ -30,23 +30,25 @@
* NOTE: It is our convention to place the authors in the following order:
* - libjpeg-turbo authors (2009-) in descending order of the date of their
* most recent contribution to the project, then in ascending order of the
* date of their first contribution to the project
* date of their first contribution to the project, then in alphabetical
* order
* - Upstream authors in descending order of the date of the first inclusion of
* their code
*/
#define JCOPYRIGHT \
"Copyright (C) 2009-2020 D. R. Commander\n" \
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
"Copyright (C) 2015, 2020 Google, Inc.\n" \
"Copyright (C) 2019 Arm Limited\n" \
"Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
"Copyright (C) 2015 Intel Corporation\n" \
"Copyright (C) 2015 Google, Inc.\n" \
"Copyright (C) 2013-2014 Linaro Limited\n" \
"Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
"Copyright (C) 2013 Linaro Limited\n" \
"Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
"Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding"
"Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"
#define JCOPYRIGHT_SHORT \
"Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"

@ -32,7 +32,7 @@ bool calib::parametersController::loadFromFile(const std::string &inputFileName)
if(!reader.isOpened()) {
std::cerr << "Warning: Unable to open " << inputFileName <<
" Applicatioin stated with default advanced parameters" << std::endl;
" Application started with default advanced parameters" << std::endl;
return true;
}

@ -131,7 +131,7 @@ For properties you can read and change look into the documentation of the @ref c
We want to check just how imperceptible our video converting operation went, therefore we need a
system to check frame by frame the similarity or differences. The most common algorithm used for
this is the PSNR (aka **Peak signal-to-noise ratio**). The simplest definition of this starts out
from the *mean squad error*. Let there be two images: I1 and I2; with a two dimensional size i and
from the *mean squared error*. Let there be two images: I1 and I2; with a two dimensional size i and
j, composed of c number of channels.
\f[MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}\f]

@ -220,6 +220,11 @@ struct VZeroUpperGuard {
# define CV_VSX 1
#endif
#ifdef __F16C__
# include <immintrin.h>
# define CV_FP16 1
#endif
#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)

@ -844,7 +844,7 @@ protected:
float16_t() : w(0) {}
explicit float16_t(float x)
{
#if CV_AVX2
#if CV_FP16
__m128 v = _mm_load_ss(&x);
w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
#else
@ -875,7 +875,7 @@ protected:
operator float() const
{
#if CV_AVX2
#if CV_FP16
float f;
_mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
return f;

@ -3121,18 +3121,39 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, un
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
//
// FP16
//
inline v_float32x8 v256_load_expand(const float16_t* ptr)
{
#if CV_FP16
return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
#else
float CV_DECL_ALIGNED(32) buf[8];
for (int i = 0; i < 8; i++)
buf[i] = (float)ptr[i];
return v256_load_aligned(buf);
#endif
}
inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
{
#if CV_FP16
__m128i ah = _mm256_cvtps_ph(a.val, 0);
_mm_storeu_si128((__m128i*)ptr, ah);
#else
float CV_DECL_ALIGNED(32) buf[8];
v_store_aligned(buf, a);
for (int i = 0; i < 8; i++)
ptr[i] = float16_t(buf[i]);
#endif
}
//
// end of FP16
//
inline void v256_cleanup() { _mm256_zeroall(); }
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END

@ -5,6 +5,11 @@
#include "precomp.hpp"
#include "convert.hpp"
#if !defined(OPENCV_SUPRESS_WARNING_AVX2_WITHOUT_FP16C) && \
(defined(__GNUC__) && defined(__AVX2__) && !defined(__F16C__))
#warning "Non-optimal compiler flags: AVX2 without FP16. Generated code is very slow. Consider adding '-mf16c' compiler option."
#endif
namespace cv {
namespace hal {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN

@ -126,9 +126,11 @@ DEFINE_SIMD_TESTS(256, AVX512_SKX)
TEST(hal_intrin256, float16x16_FP16)
{
#if CV_TRY_FP16
//CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
CV_CPU_CALL_AVX2_(test_hal_intrin_float16, ());
throw SkipTestException("Unsupported hardware: FP16 is not available");
#endif
throw SkipTestException("Unsupported: FP16 is not available");
}
@ -142,8 +144,10 @@ namespace intrin512 {
TEST(hal_intrin512, float16x32_FP16)
{
#if CV_TRY_FP16
CV_CPU_CALL_AVX512_SKX_(test_hal_intrin_float16, ());
throw SkipTestException("Unsupported hardware: FP16 is not available");
#endif
throw SkipTestException("Unsupported: FP16 is not available");
}

@ -1902,21 +1902,21 @@ void test_hal_intrin_float64()
#endif
}
#if CV_FP16
void test_hal_intrin_float16()
{
DUMP_ENTRY(v_float16);
#if CV_FP16
TheTest<v_float32>()
.test_loadstore_fp16_f32()
#endif
#if CV_SIMD_FP16
.test_loadstore_fp16()
.test_float_cvt_fp16()
#endif
;
}
#else
std::cout << "SKIP: CV_FP16 is not available" << std::endl;
#endif
}
/*#if defined(CV_CPU_DISPATCH_MODE_FP16) && CV_CPU_DISPATCH_MODE == FP16
void test_hal_intrin_float16()

@ -365,9 +365,12 @@ CV__DNN_INLINE_NS_BEGIN
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const;
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const {CV_UNUSED(inputs); CV_UNUSED(outputs); return 0;}
virtual bool updateMemoryShapes(const std::vector<MatShape> &inputs);
CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
CV_PROP int preferableTarget; //!< prefer target for layer forwarding

@ -1182,6 +1182,7 @@ struct Net::Impl : public detail::NetImplBase
preferableBackend = DNN_BACKEND_DEFAULT;
preferableTarget = DNN_TARGET_CPU;
skipInfEngineInit = false;
hasDynamicShapes = false;
}
Ptr<DataLayer> netInputLayer;
@ -1193,6 +1194,7 @@ struct Net::Impl : public detail::NetImplBase
int preferableTarget;
String halideConfigFile;
bool skipInfEngineInit;
bool hasDynamicShapes;
// Map host data to backend specific wrapper.
std::map<void*, Ptr<BackendWrapper> > backendWrappers;
@ -3539,6 +3541,46 @@ struct Net::Impl : public detail::NetImplBase
shapes = inOutShapes[layerId];
}
void updateLayersShapes()
{
CV_Assert(!layers[0].outputBlobs.empty());
ShapesVec inputShapes;
for(int i = 0; i < layers[0].outputBlobs.size(); i++)
{
Mat& inp = layers[0].outputBlobs[i];
CV_Assert(inp.total());
if (preferableBackend == DNN_BACKEND_OPENCV &&
preferableTarget == DNN_TARGET_OPENCL_FP16)
{
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
}
inputShapes.push_back(shape(inp));
}
LayersShapesMap layersShapes;
layersShapes[0].in = inputShapes;
for (MapIdToLayerData::iterator it = layers.begin();
it != layers.end(); it++)
{
int layerId = it->first;
std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
if (layersShapes[layerId].in.empty())
{
for(int i = 0; i < inputLayerIds.size(); i++)
{
int inputLayerId = inputLayerIds[i].lid;
LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
if(inputIt == layersShapes.end() || inputIt->second.out.empty())
{
getLayerShapesRecursively(inputLayerId, layersShapes);
}
const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
layersShapes[layerId].in.push_back(shape);
}
it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in);
}
}
}
LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
{
return *std::max_element(pins.begin(), pins.end());
@ -3952,6 +3994,8 @@ int Net::addLayer(const String &name, const String &type, LayerParams &params)
int id = ++impl->lastLayerId;
impl->layerNameToId.insert(std::make_pair(name, id));
impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params)));
if (params.get<bool>("has_dynamic_shapes", false))
impl->hasDynamicShapes = true;
return id;
}
@ -4283,8 +4327,13 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
bool oldShape = prevShape == blobShape;
blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
if (!oldShape)
if (!oldShape) {
ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
if (impl->hasDynamicShapes)
{
impl->updateLayersShapes();
}
}
if (!ld.outputBlobsWrappers[pin.oid].empty())
{
@ -5234,6 +5283,10 @@ bool Layer::getMemoryShapes(const std::vector<MatShape> &inputs,
return false;
}
bool Layer::updateMemoryShapes(const std::vector<MatShape> &inputs)
{
return true;
}
//////////////////////////////////////////////////////////////////////////
static Mutex& getLayerFactoryMutex()

@ -98,6 +98,9 @@ public:
stride = Size(1, 1);
pad_t = pad_l = pad_b = pad_r = 0;
hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
shapesInitialized = !hasDynamicShapes;
if (params.has("pool") || params.has("kernel_size") ||
params.has("kernel_w") || params.has("kernel_h"))
{
@ -1191,26 +1194,34 @@ public:
outShape.push_back(pooledSize.height);
outShape.push_back(pooledSize.width);
}
else if (padMode.empty())
else
{
for (int i = 0; i < local_kernel.size(); i++) {
float dst = (float)(inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
if (hasDynamicShapes && !shapesInitialized)
{
//Just copy input shapes for width and height to prevent errors on loading stage
for (int i = 0; i < inpShape.size(); i++)
outShape.push_back(inpShape[i]);
}
else if (padMode.empty())
{
for (int i = 0; i < local_kernel.size(); i++) {
float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
}
// If we have padding, ensure that the last pooling starts strictly
// inside the image (instead of at the padding); otherwise clip the last.
for (int i = 0; i < pads_end.size(); i++) {
if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
--outShape[2 + i];
CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
// If we have padding, ensure that the last pooling starts strictly
// inside the image (instead of at the padding); otherwise clip the last.
for (int i = 0; i < pads_end.size(); i++) {
if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) {
--outShape[2 + i];
CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]);
}
}
} else {
getConvPoolOutParams(inpShape, local_kernel, strides, padMode,
std::vector<size_t>(local_kernel.size(), 1), outShape);
}
}
else
{
getConvPoolOutParams(inpShape, local_kernel, strides, padMode, std::vector<size_t>(local_kernel.size(), 1), outShape);
}
if (type == ROI)
{
CV_Assert(inputs.size() == 2);
@ -1231,6 +1242,14 @@ public:
return false;
}
bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
{
int dims = inputs[0].size();
CV_Assert(inputs[0][dims - 1] > 0 && inputs[0][dims - 2] > 0);
shapesInitialized = true;
return true;
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const CV_OVERRIDE
{
@ -1262,6 +1281,8 @@ private:
ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf
PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf
};
bool hasDynamicShapes;
bool shapesInitialized;
};
Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)

@ -170,6 +170,9 @@ public:
setParamsFrom(params);
int axis = params.get<int>("axis", 0);
int numAxes = params.get<int>("num_axes", -1);
hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
shapesInitialized = !hasDynamicShapes;
CV_Assert(numAxes >= -1);
newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
@ -182,6 +185,25 @@ public:
for (i = 0; i < dims; i++)
newShapeDesc[i] = paramShape.get<int>(i);
}
if (hasDynamicShapes)
{
dynamicShapes.clear();
inputIndices.clear();
if (params.has("dynamic_axes")) {
CV_Assert(params.has("input_indices"));
const DictValue &dynamicAxes = params.get("dynamic_axes");
const DictValue &dynamicInputShapes = params.get("input_indices");
int i, dims = dynamicAxes.size();
CV_Assert(dims == dynamicInputShapes.size());
CV_Assert(dims > 0);
dynamicShapes.resize(dims);
inputIndices.resize(dims);
for (i = 0; i < dims; i++) {
dynamicShapes[i] = dynamicAxes.get<int>(i);
inputIndices[i] = dynamicInputShapes.get<int>(i);
}
}
}
}
virtual bool supportBackend(int backendId) CV_OVERRIDE
@ -196,13 +218,21 @@ public:
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
if (inputs.size() == 1 || inputs.size() == requiredOutputs)
{
outputs.clear();
for (size_t i = 0; i < inputs.size(); i++)
{
outputs.push_back(MatShape());
computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
if (hasDynamicShapes && !shapesInitialized)
{
outputs.push_back(newShapeDesc);
}
else
{
outputs.push_back(MatShape());
computeShapeByReshapeMask(inputs[i], newShapeDesc, newShapeRange, outputs.back());
}
}
}
else
@ -213,6 +243,19 @@ public:
return true;
}
bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
{
if (hasDynamicShapes)
{
for (int i = 0; i < dynamicShapes.size(); ++i)
{
newShapeDesc[dynamicShapes[i]] = inputs[0][inputIndices[i]];
}
}
shapesInitialized = true;
return true;
}
void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> outputs;
@ -310,6 +353,10 @@ public:
private:
std::vector<MatShape> outShapes;
std::vector<int> dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input
std::vector<int> inputIndices; // Which axes from input are needed to compute correct output shape
bool hasDynamicShapes;
bool shapesInitialized;
};
Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)

@ -72,6 +72,8 @@ public:
setParamsFrom(params);
axis = params.get<int>("axis", 1);
num_split = params.get<int>("num_split", 0);
hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
shapesInitialized = !hasDynamicShapes;
if (params.has("slice_point"))
{
CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
@ -150,7 +152,8 @@ public:
CV_Assert(sliceRanges[i].size() <= inpShape.size());
for (int j = 0; j < sliceRanges[i].size(); ++j)
{
outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
if (shapesInitialized || inpShape[j] > 0)
outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
}
}
}
@ -165,6 +168,12 @@ public:
return false;
}
bool updateMemoryShapes(const std::vector<MatShape> &inputs) CV_OVERRIDE
{
shapesInitialized = true;
return true;
}
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
#ifdef HAVE_OPENCL
@ -597,6 +606,8 @@ public:
protected:
// The actual non-negative values determined from @p sliceRanges depends on input size.
std::vector<std::vector<Range> > finalSliceRanges;
bool hasDynamicShapes;
bool shapesInitialized;
};
class CropLayerImpl CV_FINAL : public SliceLayerImpl

@ -260,6 +260,40 @@ public:
addNodeToMatch("Cast", gather);
setFusedNode("Gather", input, index);
}
virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
std::vector<int>& matchedNodesIds,
std::vector<int>& targetNodesIds) CV_OVERRIDE
{
bool retVal = Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds);
size_t matchedNodesNum = matchedNodesIds.size();
// Now we check if merging can be made for these Gather and Cast nodes
if (!retVal || matchedNodesNum < 2)
return retVal;
else {
int nodeToMatch = matchedNodesIds[matchedNodesNum - 1];
const Ptr<ImportNodeWrapper> node = net->getNode(nodeToMatch);
if (node->getType() == "Cast") {
int inpNodeId = matchedNodesIds[matchedNodesNum - 2];
const Ptr<ImportNodeWrapper> inpNode = net->getNode(inpNodeId);
if (inpNode->getType() == "Gather") {
int numNodes = net->getNumNodes();
std::string inpNodeName = node->getInputName(0);
for (int i = 0; i < numNodes; ++i) {
const Ptr<ImportNodeWrapper> node_to_check = net->getNode(i);
int numInp = node_to_check->getNumInputs();
for (int inp = 0; inp < numInp; ++inp) {
if (i != nodeToMatch && inpNodeName == node_to_check->getInputName(0)) {
// Another node has the same input node, so it cannot be merged.
return false;
}
}
}
}
}
}
return retVal;
}
};
class ExpandSubgraph : public Subgraph
@ -513,6 +547,19 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
CV_Assert(!field.empty());
Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1);
}
else if (datatype == opencv_onnx::TensorProto_DataType_INT32)
{
if (!tensor_proto.int32_data().empty())
{
const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
Mat(sizes, CV_32SC1, (void*)field.data()).copyTo(blob);
}
else
{
char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
Mat(sizes, CV_32SC1, val).copyTo(blob);
}
}
else if (datatype == opencv_onnx::TensorProto_DataType_INT64)
{
blob.create(sizes, CV_32SC1);

@ -64,6 +64,7 @@ public:
ONNXImporter(Net& net, const char *onnxFile)
: dstNet(net)
{
hasDynamicShapes = false;
CV_Assert(onnxFile);
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);
@ -84,6 +85,7 @@ public:
ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
: dstNet(net)
{
hasDynamicShapes = false;
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
struct _Buf : public std::streambuf
@ -115,6 +117,7 @@ protected:
std::map<std::string, Mat> constBlobs;
std::map<std::string, MatShape> outShapes; // List of internal blobs shapes.
bool hasDynamicShapes; // Whether the model has inputs with dynamic shapes
typedef std::map<std::string, MatShape>::iterator IterShape_t;
std::map<std::string, LayerInfo> layer_id;
@ -413,8 +416,10 @@ void ONNXImporter::populateNet()
for (int j = 0; j < inpShape.size(); ++j)
{
inpShape[j] = tensorShape.dim(j).dim_value();
if (!tensorShape.dim(j).dim_param().empty())
hasDynamicShapes = true;
}
if (!inpShape.empty())
if (!inpShape.empty() && !hasDynamicShapes)
{
inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
}
@ -461,6 +466,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
layerParams.name = name;
layerParams.type = layer_type;
layerParams.set("has_dynamic_shapes", hasDynamicShapes);
if (layer_type == "MaxPool")
{
@ -551,11 +557,36 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
CV_Assert(axes.size() <= inpShape.size() - 2);
std::vector<int> kernel_size(inpShape.size() - 2, 1);
for (int i = 0; i < axes.size(); i++) {
int axis = clamp(axes.get<int>(i), inpShape.size());
CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
kernel_size[axis - 2] = inpShape[axis];
if (axes.size() == 1 && (clamp(axes.get<int>(0), inpShape.size()) <= 1))
{
int axis = clamp(axes.get<int>(0), inpShape.size());
MatShape newShape = inpShape;
newShape[axis + 1] = total(newShape, axis + 1);
newShape.resize(axis + 2);
newShape.insert(newShape.begin(), 2 - axis, 1);
LayerParams reshapeLp;
reshapeLp.type = "Reshape";
reshapeLp.name = layerParams.name + "/reshape";
CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size()));
node_proto.set_output(0, reshapeLp.name);
addLayer(reshapeLp, node_proto);
kernel_size.resize(2);
kernel_size[0] = inpShape[axis];
node_proto.set_input(0, node_proto.output(0));
}
else
{
for (int i = 0; i < axes.size(); i++) {
int axis = clamp(axes.get<int>(i), inpShape.size());
CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
kernel_size[axis - 2] = inpShape[axis];
}
}
LayerParams poolLp = layerParams;
poolLp.name = layerParams.name + "/avg";
CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
@ -1276,6 +1307,20 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
{
layerParams.type = "Reshape";
layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
if (hasDynamicShapes)
{
std::vector<int> dynamicAxes;
std::vector<int> inputIndices;
for (int index = 0; index < inpShape.size(); ++index)
{
if (!maskedAxes[index])
inputIndices.push_back(index);
}
for (int index = 0; index < outShape.size(); ++index)
dynamicAxes.push_back(index);
layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
}
}
else
layerParams.type = "Identity";
@ -1338,6 +1383,19 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
outShape.insert(outShape.begin() + axis, 1);
layerParams.type = "Reshape";
layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
if (hasDynamicShapes)
{
std::vector<int> dynamicAxes;
std::vector<int> inputIndices;
for (int index = 0; index < outShape.size(); ++index) {
if (index != axis)
dynamicAxes.push_back(index);
}
for (int index = 0; index < inpShape.size(); ++index)
inputIndices.push_back(index);
layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
}
}
else if (layer_type == "Expand")
{
@ -1625,6 +1683,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
sliceLp.set("begin", paramBegin);
sliceLp.set("end", paramEnd);
sliceLp.set("has_dynamic_shapes", hasDynamicShapes);
if (inpShape.size() > 1)
{
@ -1637,6 +1696,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
layerParams.type = "Reshape";
layerParams.set("axis", 0);
layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
if (hasDynamicShapes)
{
std::vector<int> dynamicAxes;
std::vector<int> inputIndices;
for (int index = 0; index < inpShape.size(); ++index)
dynamicAxes.push_back(index);
for (int index = 0; index < inpShape.size(); ++index)
inputIndices.push_back(index);
layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size()));
layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size()));
}
node_proto.set_input(0, sliceLp.name);
}
else
@ -1676,7 +1746,11 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
for (int i = 1; i < node_proto.input_size(); i++)
CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());
String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
String interp_mode;
if (layerParams.has("coordinate_transformation_mode"))
interp_mode = layerParams.get<String>("coordinate_transformation_mode");
else
interp_mode = layerParams.get<String>("mode");
CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
layerParams.set("align_corners", interp_mode == "align_corners");
@ -1688,16 +1762,23 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
shapes.convertTo(shapes, CV_32S);
int height = shapes.at<int>(2);
int width = shapes.at<int>(3);
if (node_proto.input_size() == 3)
if (hasDynamicShapes)
{
IterShape_t shapeIt = outShapes.find(node_proto.input(0));
CV_Assert(shapeIt != outShapes.end());
MatShape scales = shapeIt->second;
height *= scales[2];
width *= scales[3];
layerParams.set("zoom_factor_x", width);
layerParams.set("zoom_factor_y", height);
}
else
{
if (node_proto.input_size() == 3) {
IterShape_t shapeIt = outShapes.find(node_proto.input(0));
CV_Assert(shapeIt != outShapes.end());
MatShape scales = shapeIt->second;
height *= scales[2];
width *= scales[3];
}
layerParams.set("width", width);
layerParams.set("height", height);
}
layerParams.set("width", width);
layerParams.set("height", height);
if (layerParams.get<String>("mode") == "linear") {
layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?

@ -280,9 +280,11 @@ TEST_P(Test_ONNX_layers, ReduceSum)
testONNXModels("reduce_sum");
}
TEST_P(Test_ONNX_layers, ReduceMaxGlobal)
TEST_P(Test_ONNX_layers, ReduceMax)
{
testONNXModels("reduce_max");
testONNXModels("reduce_max_axis_0");
testONNXModels("reduce_max_axis_1");
}
TEST_P(Test_ONNX_layers, Scale)
@ -718,6 +720,10 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight)
TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
String basename = "conv1d_variable_wb";
Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
ASSERT_FALSE(net.empty());
@ -738,6 +744,37 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
normAssert(ref, out, "", default_l1, default_lInf);
}
TEST_P(Test_ONNX_layers, GatherMultiOutput)
{
#if defined(INF_ENGINE_RELEASE)
if (target == DNN_TARGET_MYRIAD)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE);
#endif
testONNXModels("gather_multi_output");
}
TEST_P(Test_ONNX_layers, DynamicAxes)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
testONNXModels("squeeze_and_conv_dynamic_axes");
testONNXModels("unsqueeze_and_conv_dynamic_axes");
testONNXModels("gather_dynamic_axes");
testONNXModels("gather_scalar_dynamic_axes");
testONNXModels("slice_dynamic_axes");
testONNXModels("slice_opset_11_dynamic_axes");
testONNXModels("resize_opset11_torch1.6_dynamic_axes");
testONNXModels("average_pooling_dynamic_axes");
testONNXModels("maxpooling_sigmoid_dynamic_axes");
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
class Test_ONNX_nets : public Test_ONNX_layers

Loading…
Cancel
Save