Merge remote-tracking branch 'upstream/3.4' into merge-3.4

6 years ago · f3de2b4be7
parent e329c84d5e 51631b90af
commit f3de2b4be7
155 changed files with 5855 additions and 1799 deletions
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@ -78,10 +78,11 @@ configure_file(jconfigint.h.in jconfigint.h)

 include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src)

-set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
+set(JPEG_SOURCES
+  jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c jcicc.c
  jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c jcphuff.c
  jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c jdatasrc.c
-  jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c
+  jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c jdmainct.c jdmarker.c
  jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c
  jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c
  jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
--- a/3rdparty/libjpeg-turbo/src/jcicc.c
+++ b/3rdparty/libjpeg-turbo/src/jcicc.c
@ -0,0 +1,105 @@
+/*
+ * jcicc.c
+ *
+ * Copyright (C) 1997-1998, Thomas G. Lane, Todd Newman.
+ * Copyright (C) 2017, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file provides code to write International Color Consortium (ICC) device
+ * profiles embedded in JFIF JPEG image files.  The ICC has defined a standard
+ * for including such data in JPEG "APP2" markers.  The code given here does
+ * not know anything about the internal structure of the ICC profile data; it
+ * just knows how to embed the profile data in a JPEG file while writing it.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/*
+ * Since an ICC profile can be larger than the maximum size of a JPEG marker
+ * (64K), we need provisions to split it into multiple markers.  The format
+ * defined by the ICC specifies one or more APP2 markers containing the
+ * following data:
+ *      Identifying string      ASCII "ICC_PROFILE\0"  (12 bytes)
+ *      Marker sequence number  1 for first APP2, 2 for next, etc (1 byte)
+ *      Number of markers       Total number of APP2's used (1 byte)
+ *      Profile data            (remainder of APP2 data)
+ * Decoders should use the marker sequence numbers to reassemble the profile,
+ * rather than assuming that the APP2 markers appear in the correct sequence.
+ */
+
+#define ICC_MARKER  (JPEG_APP0 + 2)     /* JPEG marker code for ICC */
+#define ICC_OVERHEAD_LEN  14            /* size of non-profile data in APP2 */
+#define MAX_BYTES_IN_MARKER  65533      /* maximum data len of a JPEG marker */
+#define MAX_DATA_BYTES_IN_MARKER  (MAX_BYTES_IN_MARKER - ICC_OVERHEAD_LEN)
+
+
+/*
+ * This routine writes the given ICC profile data into a JPEG file.  It *must*
+ * be called AFTER calling jpeg_start_compress() and BEFORE the first call to
+ * jpeg_write_scanlines().  (This ordering ensures that the APP2 marker(s) will
+ * appear after the SOI and JFIF or Adobe markers, but before all else.)
+ */
+
+GLOBAL(void)
+jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr,
+                       unsigned int icc_data_len)
+{
+  unsigned int num_markers;     /* total number of markers we'll write */
+  int cur_marker = 1;           /* per spec, counting starts at 1 */
+  unsigned int length;          /* number of bytes to write in this marker */
+
+  if (icc_data_ptr == NULL || icc_data_len == 0)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  if (cinfo->global_state < CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Calculate the number of markers we'll need, rounding up of course */
+  num_markers = icc_data_len / MAX_DATA_BYTES_IN_MARKER;
+  if (num_markers * MAX_DATA_BYTES_IN_MARKER != icc_data_len)
+    num_markers++;
+
+  while (icc_data_len > 0) {
+    /* length of profile to put in this marker */
+    length = icc_data_len;
+    if (length > MAX_DATA_BYTES_IN_MARKER)
+      length = MAX_DATA_BYTES_IN_MARKER;
+    icc_data_len -= length;
+
+    /* Write the JPEG marker header (APP2 code and marker length) */
+    jpeg_write_m_header(cinfo, ICC_MARKER,
+                        (unsigned int)(length + ICC_OVERHEAD_LEN));
+
+    /* Write the marker identifying string "ICC_PROFILE" (null-terminated).  We
+     * code it in this less-than-transparent way so that the code works even if
+     * the local character set is not ASCII.
+     */
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x5F);
+    jpeg_write_m_byte(cinfo, 0x50);
+    jpeg_write_m_byte(cinfo, 0x52);
+    jpeg_write_m_byte(cinfo, 0x4F);
+    jpeg_write_m_byte(cinfo, 0x46);
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x4C);
+    jpeg_write_m_byte(cinfo, 0x45);
+    jpeg_write_m_byte(cinfo, 0x0);
+
+    /* Add the sequencing info */
+    jpeg_write_m_byte(cinfo, cur_marker);
+    jpeg_write_m_byte(cinfo, (int)num_markers);
+
+    /* Add the profile data */
+    while (length--) {
+      jpeg_write_m_byte(cinfo, *icc_data_ptr);
+      icc_data_ptr++;
+    }
+    cur_marker++;
+  }
+}
--- a/3rdparty/libjpeg-turbo/src/jdicc.c
+++ b/3rdparty/libjpeg-turbo/src/jdicc.c
@ -0,0 +1,171 @@
+/*
+ * jdicc.c
+ *
+ * Copyright (C) 1997-1998, Thomas G. Lane, Todd Newman.
+ * Copyright (C) 2017, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file provides code to read International Color Consortium (ICC) device
+ * profiles embedded in JFIF JPEG image files.  The ICC has defined a standard
+ * for including such data in JPEG "APP2" markers.  The code given here does
+ * not know anything about the internal structure of the ICC profile data; it
+ * just knows how to get the profile data from a JPEG file while reading it.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc() */
+extern void *malloc(size_t size);
+#endif
+
+
+#define ICC_MARKER  (JPEG_APP0 + 2)     /* JPEG marker code for ICC */
+#define ICC_OVERHEAD_LEN  14            /* size of non-profile data in APP2 */
+
+
+/*
+ * Handy subroutine to test whether a saved marker is an ICC profile marker.
+ */
+
+LOCAL(boolean)
+marker_is_icc(jpeg_saved_marker_ptr marker)
+{
+  return
+    marker->marker == ICC_MARKER &&
+    marker->data_length >= ICC_OVERHEAD_LEN &&
+    /* verify the identifying string */
+    GETJOCTET(marker->data[0]) == 0x49 &&
+    GETJOCTET(marker->data[1]) == 0x43 &&
+    GETJOCTET(marker->data[2]) == 0x43 &&
+    GETJOCTET(marker->data[3]) == 0x5F &&
+    GETJOCTET(marker->data[4]) == 0x50 &&
+    GETJOCTET(marker->data[5]) == 0x52 &&
+    GETJOCTET(marker->data[6]) == 0x4F &&
+    GETJOCTET(marker->data[7]) == 0x46 &&
+    GETJOCTET(marker->data[8]) == 0x49 &&
+    GETJOCTET(marker->data[9]) == 0x4C &&
+    GETJOCTET(marker->data[10]) == 0x45 &&
+    GETJOCTET(marker->data[11]) == 0x0;
+}
+
+
+/*
+ * See if there was an ICC profile in the JPEG file being read; if so,
+ * reassemble and return the profile data.
+ *
+ * TRUE is returned if an ICC profile was found, FALSE if not.  If TRUE is
+ * returned, *icc_data_ptr is set to point to the returned data, and
+ * *icc_data_len is set to its length.
+ *
+ * IMPORTANT: the data at *icc_data_ptr is allocated with malloc() and must be
+ * freed by the caller with free() when the caller no longer needs it.
+ * (Alternatively, we could write this routine to use the IJG library's memory
+ * allocator, so that the data would be freed implicitly when
+ * jpeg_finish_decompress() is called.  But it seems likely that many
+ * applications will prefer to have the data stick around after decompression
+ * finishes.)
+ */
+
+GLOBAL(boolean)
+jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+                      unsigned int *icc_data_len)
+{
+  jpeg_saved_marker_ptr marker;
+  int num_markers = 0;
+  int seq_no;
+  JOCTET *icc_data;
+  unsigned int total_length;
+#define MAX_SEQ_NO  255         /* sufficient since marker numbers are bytes */
+  char marker_present[MAX_SEQ_NO + 1];      /* 1 if marker found */
+  unsigned int data_length[MAX_SEQ_NO + 1]; /* size of profile data in marker */
+  unsigned int data_offset[MAX_SEQ_NO + 1]; /* offset for data in marker */
+
+  if (icc_data_ptr == NULL || icc_data_len == NULL)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  if (cinfo->global_state < DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  *icc_data_ptr = NULL;         /* avoid confusion if FALSE return */
+  *icc_data_len = 0;
+
+  /* This first pass over the saved markers discovers whether there are
+   * any ICC markers and verifies the consistency of the marker numbering.
+   */
+
+  for (seq_no = 1; seq_no <= MAX_SEQ_NO; seq_no++)
+    marker_present[seq_no] = 0;
+
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      if (num_markers == 0)
+        num_markers = GETJOCTET(marker->data[13]);
+      else if (num_markers != GETJOCTET(marker->data[13])) {
+        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* inconsistent num_markers fields */
+        return FALSE;
+      }
+      seq_no = GETJOCTET(marker->data[12]);
+      if (seq_no <= 0 || seq_no > num_markers) {
+        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* bogus sequence number */
+        return FALSE;
+      }
+      if (marker_present[seq_no]) {
+        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* duplicate sequence numbers */
+        return FALSE;
+      }
+      marker_present[seq_no] = 1;
+      data_length[seq_no] = marker->data_length - ICC_OVERHEAD_LEN;
+    }
+  }
+
+  if (num_markers == 0)
+    return FALSE;
+
+  /* Check for missing markers, count total space needed,
+   * compute offset of each marker's part of the data.
+   */
+
+  total_length = 0;
+  for (seq_no = 1; seq_no <= num_markers; seq_no++) {
+    if (marker_present[seq_no] == 0) {
+      WARNMS(cinfo, JWRN_BOGUS_ICC);  /* missing sequence number */
+      return FALSE;
+    }
+    data_offset[seq_no] = total_length;
+    total_length += data_length[seq_no];
+  }
+
+  if (total_length == 0) {
+    WARNMS(cinfo, JWRN_BOGUS_ICC);  /* found only empty markers? */
+    return FALSE;
+  }
+
+  /* Allocate space for assembled data */
+  icc_data = (JOCTET *)malloc(total_length * sizeof(JOCTET));
+  if (icc_data == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 11);  /* oops, out of memory */
+
+  /* and fill it in */
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      JOCTET FAR *src_ptr;
+      JOCTET *dst_ptr;
+      unsigned int length;
+      seq_no = GETJOCTET(marker->data[12]);
+      dst_ptr = icc_data + data_offset[seq_no];
+      src_ptr = marker->data + ICC_OVERHEAD_LEN;
+      length = data_length[seq_no];
+      while (length--) {
+        *dst_ptr++ = *src_ptr++;
+      }
+    }
+  }
+
+  *icc_data_ptr = icc_data;
+  *icc_data_len = total_length;
+
+  return TRUE;
+}
--- a/3rdparty/libjpeg/README
+++ b/3rdparty/libjpeg/README
@ -1,7 +1,7 @@
 The Independent JPEG Group's JPEG software
 ==========================================

-README for release 9b of 17-Jan-2016
+README for release 9c of 14-Jan-2018
 ====================================

 This distribution contains the ninth public release of the Independent JPEG
@ -115,7 +115,7 @@ with respect to this software, its quality, accuracy, merchantability, or
 fitness for a particular purpose.  This software is provided "AS IS", and you,
 its user, assume the entire risk as to its quality and accuracy.

-This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding.
+This software is copyright (C) 1991-2018, Thomas G. Lane, Guido Vollbeding.
 All Rights Reserved except as specified below.

 Permission is hereby granted to use, copy, modify, and distribute this
@ -246,8 +246,8 @@ ARCHIVE LOCATIONS
 The "official" archive site for this software is www.ijg.org.
 The most recent released version can always be found there in
 directory "files".  This particular version will be archived as
-http://www.ijg.org/files/jpegsrc.v9b.tar.gz, and in Windows-compatible
-"zip" archive format as http://www.ijg.org/files/jpegsr9b.zip.
+http://www.ijg.org/files/jpegsrc.v9c.tar.gz, and in Windows-compatible
+"zip" archive format as http://www.ijg.org/files/jpegsr9c.zip.

 The JPEG FAQ (Frequently Asked Questions) article is a source of some
 general information about JPEG.
@ -293,8 +293,11 @@ communication about JPEG configuration in Sigma Photo Pro software.

 Thank to Andrew Finkenstadt for hosting the ijg.org site.

-Last but not least special thank to Thomas G. Lane for the original
-design and development of this singular software package.
+Thank to Thomas G. Lane for the original design and development of
+this singular software package.
+
+Thank to Lars Goehler, Andreas Heinecke, Sebastian Fuss, Yvonne Roebert,
+Andrej Werner, and Ulf-Dietrich Braumann for support and public relations.


 FILE FORMAT WARS
--- a/3rdparty/libjpeg/change.log
+++ b/3rdparty/libjpeg/change.log
@ -1,6 +1,27 @@
 CHANGE LOG for Independent JPEG Group's JPEG software


+Version 9c  14-Jan-2018
+-----------------------
+
+jpegtran: add an option to the -wipe switch to fill the region
+with the average of adjacent blocks, instead of gray out.
+Thank to Caitlyn Feddock and Maddie Ziegler for inspiration.
+
+Make range extension bits adjustable (in jpegint.h).
+Thank to Robin Watts for suggestion.
+
+Provide macros for fflush() and ferror() in jinclude.h in order
+to facilitate adaption by applications using an own FILE class.
+Thank to Gerhard Huber for suggestion.
+
+Add libjpeg pkg-config file.  Thank to Mark Lavi, Vincent Torri,
+Patrick McMunn, and Huw Davies for suggestion.
+
+Add sanity checks in cjpeg image reader modules.
+Thank to Bingchang, Liu for reports.
+
+
 Version 9b  17-Jan-2016
 -----------------------

--- a/3rdparty/libjpeg/jcinit.c
+++ b/3rdparty/libjpeg/jcinit.c
@ -2,7 +2,7 @@
 * jcinit.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2013 by Guido Vollbeding.
+ * Modified 2003-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -21,6 +21,168 @@
 #include "jpeglib.h"


+/*
+ * Compute JPEG image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  /* Sanity check on input image dimensions to prevent overflow in
+   * following calculations.
+   * We do check jpeg_width and jpeg_height in initial_setup in jcmaster.c,
+   * but image_width and image_height can come from arbitrary data,
+   * and we need some space for multiplication by block_size.
+   */
+  if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24))
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+#ifdef DCT_SCALING_SUPPORTED
+
+  /* Compute actual JPEG image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/1 scaling */
+    cinfo->jpeg_width = cinfo->image_width * cinfo->block_size;
+    cinfo->jpeg_height = cinfo->image_height * cinfo->block_size;
+    cinfo->min_DCT_h_scaled_size = 1;
+    cinfo->min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/2 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L);
+    cinfo->min_DCT_h_scaled_size = 2;
+    cinfo->min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/3 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L);
+    cinfo->min_DCT_h_scaled_size = 3;
+    cinfo->min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/4 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L);
+    cinfo->min_DCT_h_scaled_size = 4;
+    cinfo->min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/5 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L);
+    cinfo->min_DCT_h_scaled_size = 5;
+    cinfo->min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/6 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L);
+    cinfo->min_DCT_h_scaled_size = 6;
+    cinfo->min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/7 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L);
+    cinfo->min_DCT_h_scaled_size = 7;
+    cinfo->min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/8 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L);
+    cinfo->min_DCT_h_scaled_size = 8;
+    cinfo->min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/9 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L);
+    cinfo->min_DCT_h_scaled_size = 9;
+    cinfo->min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/10 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L);
+    cinfo->min_DCT_h_scaled_size = 10;
+    cinfo->min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/11 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L);
+    cinfo->min_DCT_h_scaled_size = 11;
+    cinfo->min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/12 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L);
+    cinfo->min_DCT_h_scaled_size = 12;
+    cinfo->min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/13 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L);
+    cinfo->min_DCT_h_scaled_size = 13;
+    cinfo->min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/14 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L);
+    cinfo->min_DCT_h_scaled_size = 14;
+    cinfo->min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/15 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L);
+    cinfo->min_DCT_h_scaled_size = 15;
+    cinfo->min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide block_size/16 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L);
+    cinfo->min_DCT_h_scaled_size = 16;
+    cinfo->min_DCT_v_scaled_size = 16;
+  }
+
+#else /* !DCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->jpeg_width = cinfo->image_width;
+  cinfo->jpeg_height = cinfo->image_height;
+  cinfo->min_DCT_h_scaled_size = DCTSIZE;
+  cinfo->min_DCT_v_scaled_size = DCTSIZE;
+
+#endif /* DCT_SCALING_SUPPORTED */
+}
+
+
 /*
 * Master selection of compression modules.
 * This is done once at the start of processing an image.  We determine
@ -37,7 +199,7 @@ jinit_compress_master (j_compress_ptr cinfo)
  if (cinfo->data_precision != BITS_IN_JSAMPLE)
    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);

-  /* Sanity check on image dimensions */
+  /* Sanity check on input image dimensions */
  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
      cinfo->input_components <= 0)
    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
@ -48,6 +210,9 @@ jinit_compress_master (j_compress_ptr cinfo)
  if ((long) jd_samplesperrow != samplesperrow)
    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);

+  /* Compute JPEG image dimensions and related values. */
+  jpeg_calc_jpeg_dimensions(cinfo);
+
  /* Initialize master control (includes parameter checking/processing) */
  jinit_c_master_control(cinfo, FALSE /* full compression */);

--- a/3rdparty/libjpeg/jcmaster.c
+++ b/3rdparty/libjpeg/jcmaster.c
@ -2,7 +2,7 @@
 * jcmaster.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2013 by Guido Vollbeding.
+ * Modified 2003-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -43,191 +43,13 @@ typedef my_comp_master * my_master_ptr;
 * Support routines that do various essential calculations.
 */

-/*
- * Compute JPEG image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-#ifdef DCT_SCALING_SUPPORTED
-
-  /* Sanity check on input image dimensions to prevent overflow in
-   * following calculation.
-   * We do check jpeg_width and jpeg_height in initial_setup below,
-   * but image_width and image_height can come from arbitrary data,
-   * and we need some space for multiplication by block_size.
-   */
-  if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24))
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* Compute actual JPEG image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/1 scaling */
-    cinfo->jpeg_width = cinfo->image_width * cinfo->block_size;
-    cinfo->jpeg_height = cinfo->image_height * cinfo->block_size;
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/2 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/3 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L);
-    cinfo->min_DCT_h_scaled_size = 3;
-    cinfo->min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/4 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/5 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L);
-    cinfo->min_DCT_h_scaled_size = 5;
-    cinfo->min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/6 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L);
-    cinfo->min_DCT_h_scaled_size = 6;
-    cinfo->min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/7 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L);
-    cinfo->min_DCT_h_scaled_size = 7;
-    cinfo->min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/8 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/9 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L);
-    cinfo->min_DCT_h_scaled_size = 9;
-    cinfo->min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/10 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L);
-    cinfo->min_DCT_h_scaled_size = 10;
-    cinfo->min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/11 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L);
-    cinfo->min_DCT_h_scaled_size = 11;
-    cinfo->min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/12 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L);
-    cinfo->min_DCT_h_scaled_size = 12;
-    cinfo->min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/13 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L);
-    cinfo->min_DCT_h_scaled_size = 13;
-    cinfo->min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/14 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L);
-    cinfo->min_DCT_h_scaled_size = 14;
-    cinfo->min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/15 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L);
-    cinfo->min_DCT_h_scaled_size = 15;
-    cinfo->min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide block_size/16 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L);
-    cinfo->min_DCT_h_scaled_size = 16;
-    cinfo->min_DCT_v_scaled_size = 16;
-  }
-
-#else /* !DCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->jpeg_width = cinfo->image_width;
-  cinfo->jpeg_height = cinfo->image_height;
-  cinfo->min_DCT_h_scaled_size = DCTSIZE;
-  cinfo->min_DCT_v_scaled_size = DCTSIZE;
-
-#endif /* DCT_SCALING_SUPPORTED */
-}
-
-
-LOCAL(void)
-jpeg_calc_trans_dimensions (j_compress_ptr cinfo)
-{
-  if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size)
-    ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	     cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size);
-
-  cinfo->block_size = cinfo->min_DCT_h_scaled_size;
-}
-
-
 LOCAL(void)
-initial_setup (j_compress_ptr cinfo, boolean transcode_only)
+initial_setup (j_compress_ptr cinfo)
 /* Do computations that are needed before master selection phase */
 {
  int ci, ssize;
  jpeg_component_info *compptr;

-  if (transcode_only)
-    jpeg_calc_trans_dimensions(cinfo);
-  else
-    jpeg_calc_jpeg_dimensions(cinfo);
-
  /* Sanity check on block_size */
  if (cinfo->block_size < 1 || cinfo->block_size > 16)
    ERREXIT2(cinfo, JERR_BAD_DCTSIZE, cinfo->block_size, cinfo->block_size);
@ -414,13 +236,9 @@ validate_script (j_compress_ptr cinfo)
       * out-of-range reconstructed DC values during the first DC scan,
       * which might cause problems for some decoders.
       */
-#if BITS_IN_JSAMPLE == 8
-#define MAX_AH_AL 10
-#else
-#define MAX_AH_AL 13
-#endif
      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
-	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+	  Ah < 0 || Ah > (cinfo->data_precision > 8 ? 13 : 10) ||
+	  Al < 0 || Al > (cinfo->data_precision > 8 ? 13 : 10))
 	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
      if (Ss == 0) {
 	if (Se != 0)		/* DC and AC together not OK */
@ -812,7 +630,7 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
  master->pub.is_last_pass = FALSE;

  /* Validate parameters, determine derived values */
-  initial_setup(cinfo, transcode_only);
+  initial_setup(cinfo);

  if (cinfo->scan_info != NULL) {
 #ifdef C_MULTISCAN_FILES_SUPPORTED
--- a/3rdparty/libjpeg/jctrans.c
+++ b/3rdparty/libjpeg/jctrans.c
@ -2,7 +2,7 @@
 * jctrans.c
 *
 * Copyright (C) 1995-1998, Thomas G. Lane.
- * Modified 2000-2013 by Guido Vollbeding.
+ * Modified 2000-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -85,12 +85,15 @@ jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
  jpeg_set_defaults(dstinfo);
  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
   * Fix it to get the right header markers for the image colorspace.
-   * Note: Entropy table assignment in jpeg_set_colorspace depends
-   * on color_transform.
+   * Note: Entropy table assignment in jpeg_set_colorspace
+   * depends on color_transform.
+   * Adaption is also required for setting the appropriate
+   * entropy coding mode dependent on image data precision.
   */
  dstinfo->color_transform = srcinfo->color_transform;
  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->arith_code = srcinfo->data_precision > 8 ? TRUE : FALSE;
  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
  /* Copy the source's quantization tables. */
  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
@ -157,6 +160,18 @@ jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
 }


+LOCAL(void)
+jpeg_calc_trans_dimensions (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size)
+    ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	     cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size);
+
+  cinfo->block_size = cinfo->min_DCT_h_scaled_size;
+}
+
+
 /*
 * Master selection of compression modules for transcoding.
 * This substitutes for jcinit.c's initialization of the full compressor.
@ -166,6 +181,9 @@ LOCAL(void)
 transencode_master_selection (j_compress_ptr cinfo,
 			      jvirt_barray_ptr * coef_arrays)
 {
+  /* Do computations that are needed before master selection phase */
+  jpeg_calc_trans_dimensions(cinfo);
+
  /* Initialize master control (includes parameter checking/processing) */
  jinit_c_master_control(cinfo, TRUE /* transcode only */);

--- a/3rdparty/libjpeg/jdatadst.c
+++ b/3rdparty/libjpeg/jdatadst.c
@ -2,7 +2,7 @@
 * jdatadst.c
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2012 by Guido Vollbeding.
+ * Modified 2009-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -170,9 +170,9 @@ term_destination (j_compress_ptr cinfo)
    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
      ERREXIT(cinfo, JERR_FILE_WRITE);
  }
-  fflush(dest->outfile);
+  JFFLUSH(dest->outfile);
  /* Make sure we wrote the output file OK */
-  if (ferror(dest->outfile))
+  if (JFERROR(dest->outfile))
    ERREXIT(cinfo, JERR_FILE_WRITE);
 }

--- a/3rdparty/libjpeg/jdcolor.c
+++ b/3rdparty/libjpeg/jdcolor.c
@ -2,7 +2,7 @@
 * jdcolor.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011-2015 by Guido Vollbeding.
+ * Modified 2011-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -14,6 +14,12 @@
 #include "jpeglib.h"


+#if RANGE_BITS < 2
+  /* Deliberate syntax err */
+  Sorry, this code requires 2 or more range extension bits.
+#endif
+
+
 /* Private subobject */

 typedef struct {
--- a/3rdparty/libjpeg/jdct.h
+++ b/3rdparty/libjpeg/jdct.h
@ -2,7 +2,7 @@
 * jdct.h
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2015 by Guido Vollbeding.
+ * Modified 2002-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -79,13 +79,12 @@ typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
 * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
 * be quite far out of range if the input data is corrupt, so a bulletproof
 * range-limiting step is required.  We use a mask-and-table-lookup method
- * to do the combined operations quickly, assuming that MAXJSAMPLE+1
- * is a power of 2.  See the comments with prepare_range_limit_table
- * (in jdmaster.c) for more info.
+ * to do the combined operations quickly, assuming that RANGE_CENTER
+ * (defined in jpegint.h) is a power of 2.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
 */

-#define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
-#define RANGE_CENTER  (MAXJSAMPLE * 2 + 2)
+#define RANGE_MASK  (RANGE_CENTER * 2 - 1)
 #define RANGE_SUBSET  (RANGE_CENTER - CENTERJSAMPLE)

 #define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit - RANGE_SUBSET)
--- a/3rdparty/libjpeg/jdhuff.c
+++ b/3rdparty/libjpeg/jdhuff.c
@ -2,7 +2,7 @@
 * jdhuff.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2013 by Guido Vollbeding.
+ * Modified 2006-2016 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -799,10 +799,6 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
   */
  if (! entropy->insufficient_data) {

-    Se = cinfo->Se;
-    Al = cinfo->Al;
-    natural_order = cinfo->natural_order;
-
    /* Load up working state.
     * We can avoid loading/saving bitread state if in an EOB run.
     */
@ -814,6 +810,9 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
      EOBRUN--;			/* ...process it now (we do nothing) */
    else {
      BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+      Se = cinfo->Se;
+      Al = cinfo->Al;
+      natural_order = cinfo->natural_order;
      block = MCU_data[0];
      tbl = entropy->ac_derived_tbl;

--- a/3rdparty/libjpeg/jdmainct.c
+++ b/3rdparty/libjpeg/jdmainct.c
@ -2,7 +2,7 @@
 * jdmainct.c
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2012 by Guido Vollbeding.
+ * Modified 2002-2016 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -26,8 +26,8 @@
 * trivial.  Its responsibility is to provide context rows for upsampling/
 * rescaling, and doing this in an efficient fashion is a bit tricky.
 *
- * Postprocessor input data is counted in "row groups".  A row group
- * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * Postprocessor input data is counted in "row groups".  A row group is
+ * defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
 * sample rows of each component.  (We require DCT_scaled_size values to be
 * chosen such that these numbers are integers.  In practice DCT_scaled_size
 * values will likely be powers of two, so we actually have the stronger
@ -37,8 +37,8 @@
 * applying).
 *
 * The coefficient controller will deliver data to us one iMCU row at a time;
- * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
- * exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
+ * each iMCU row contains v_samp_factor * DCT_v_scaled_size sample rows, or
+ * exactly min_DCT_v_scaled_size row groups.  (This amount of data corresponds
 * to one row of MCUs when the image is fully interleaved.)  Note that the
 * number of sample rows varies across components, but the number of row
 * groups does not.  Some garbage sample rows may be included in the last iMCU
@ -75,7 +75,7 @@
 * We could do this most simply by copying data around in our buffer, but
 * that'd be very slow.  We can avoid copying any data by creating a rather
 * strange pointer structure.  Here's how it works.  We allocate a workspace
- * consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
+ * consisting of M+2 row groups (where M = min_DCT_v_scaled_size is the number
 * of row groups per iMCU row).  We create two sets of redundant pointers to
 * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
 * pointer lists look like this:
@ -100,11 +100,11 @@
 * the first or last sample row as necessary (this is cheaper than copying
 * sample rows around).
 *
- * This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
+ * This scheme breaks down if M < 2, ie, min_DCT_v_scaled_size is 1.  In that
 * situation each iMCU row provides only one row group so the buffering logic
 * must be different (eg, we must read two iMCU rows before we can emit the
 * first row group).  For now, we simply do not support providing context
- * rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
+ * rows when min_DCT_v_scaled_size is 1.  That combination seems unlikely to
 * be worth providing --- if someone wants a 1/8th-size preview, they probably
 * want it quick and dirty, so a context-free upsampler is sufficient.
 */
@ -118,17 +118,18 @@ typedef struct {
  /* Pointer to allocated workspace (M or M+2 row groups). */
  JSAMPARRAY buffer[MAX_COMPONENTS];

-  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */

  /* Remaining fields are only used in the context case. */

+  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
+
  /* These are the master pointers to the funny-order pointer lists. */
  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */

  int whichptr;			/* indicates which pointer set is now in use */
  int context_state;		/* process_data state machine status */
-  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
 } my_main_controller;

@ -195,7 +196,7 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
 LOCAL(void)
 make_funny_pointers (j_decompress_ptr cinfo)
 /* Create the funny pointer lists discussed in the comments above.
- * The actual workspace is already allocated (in main->buffer),
+ * The actual workspace is already allocated (in mainp->buffer),
 * and the space for the pointer lists is allocated too.
 * This routine just fills in the curiously ordered lists.
 * This will be repeated at the beginning of each pass.
@ -317,12 +318,12 @@ start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
      mainp->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
      mainp->context_state = CTX_PREPARE_FOR_IMCU;
      mainp->iMCU_row_ctr = 0;
+      mainp->buffer_full = FALSE; /* Mark buffer empty */
    } else {
      /* Simple case with no context needed */
      mainp->pub.process_data = process_data_simple_main;
+      mainp->rowgroup_ctr = mainp->rowgroups_avail; /* Mark buffer empty */
    }
-    mainp->buffer_full = FALSE;	/* Mark buffer empty */
-    mainp->rowgroup_ctr = 0;
    break;
 #ifdef QUANT_2PASS_SUPPORTED
  case JBUF_CRANK_DEST:
@ -348,17 +349,14 @@ process_data_simple_main (j_decompress_ptr cinfo,
 			  JDIMENSION out_rows_avail)
 {
  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  JDIMENSION rowgroups_avail;

  /* Read input data if we haven't filled the main buffer yet */
-  if (! mainp->buffer_full) {
+  if (mainp->rowgroup_ctr >= mainp->rowgroups_avail) {
    if (! (*cinfo->coef->decompress_data) (cinfo, mainp->buffer))
      return;			/* suspension forced, can do nothing more */
-    mainp->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    mainp->rowgroup_ctr = 0;	/* OK, we have an iMCU row to work with */
  }

-  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
-  rowgroups_avail = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
  /* Note: at the bottom of the image, we may pass extra garbage row groups
   * to the postprocessor.  The postprocessor has to check for bottom
   * of image anyway (at row resolution), so no point in us doing it too.
@ -366,14 +364,8 @@ process_data_simple_main (j_decompress_ptr cinfo,

  /* Feed the postprocessor */
  (*cinfo->post->post_process_data) (cinfo, mainp->buffer,
-				     &mainp->rowgroup_ctr, rowgroups_avail,
-				     output_buf, out_row_ctr, out_rows_avail);
-
-  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
-  if (mainp->rowgroup_ctr >= rowgroups_avail) {
-    mainp->buffer_full = FALSE;
-    mainp->rowgroup_ctr = 0;
-  }
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
 }


@ -498,7 +490,9 @@ jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
    ngroups = cinfo->min_DCT_v_scaled_size + 2;
  } else {
+    /* There are always min_DCT_v_scaled_size row groups in an iMCU row. */
    ngroups = cinfo->min_DCT_v_scaled_size;
+    mainp->rowgroups_avail = (JDIMENSION) ngroups;
  }

  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
--- a/3rdparty/libjpeg/jdmaster.c
+++ b/3rdparty/libjpeg/jdmaster.c
@ -2,7 +2,7 @@
 * jdmaster.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2015 by Guido Vollbeding.
+ * Modified 2002-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -237,18 +237,17 @@ prepare_range_limit_table (j_decompress_ptr cinfo)
  JSAMPLE * table;
  int i;

-  table = (JSAMPLE *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				5 * (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  table = (JSAMPLE *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
+    JPOOL_IMAGE, (RANGE_CENTER * 2 + MAXJSAMPLE + 1) * SIZEOF(JSAMPLE));
  /* First segment of range limit table: limit[x] = 0 for x < 0 */
-  MEMZERO(table, 2 * (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
-  table += 2 * (MAXJSAMPLE+1);	/* allow negative subscripts of table */
+  MEMZERO(table, RANGE_CENTER * SIZEOF(JSAMPLE));
+  table += RANGE_CENTER;	/* allow negative subscripts of table */
  cinfo->sample_range_limit = table;
  /* Main part of range limit table: limit[x] = x */
  for (i = 0; i <= MAXJSAMPLE; i++)
    table[i] = (JSAMPLE) i;
  /* End of range limit table: limit[x] = MAXJSAMPLE for x > MAXJSAMPLE */
-  for (; i < 3 * (MAXJSAMPLE+1); i++)
+  for (; i <=  MAXJSAMPLE + RANGE_CENTER; i++)
    table[i] = MAXJSAMPLE;
 }

--- a/3rdparty/libjpeg/jdmerge.c
+++ b/3rdparty/libjpeg/jdmerge.c
@ -2,7 +2,7 @@
 * jdmerge.c
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2013-2015 by Guido Vollbeding.
+ * Modified 2013-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -40,6 +40,12 @@
 #ifdef UPSAMPLE_MERGING_SUPPORTED


+#if RANGE_BITS < 2
+  /* Deliberate syntax err */
+  Sorry, this code requires 2 or more range extension bits.
+#endif
+
+
 /* Private subobject */

 typedef struct {
--- a/3rdparty/libjpeg/jfdctflt.c
+++ b/3rdparty/libjpeg/jfdctflt.c
@ -2,7 +2,7 @@
 * jfdctflt.c
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2015 by Guido Vollbeding.
+ * Modified 2003-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -48,7 +48,7 @@
 */

 #if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
 #endif


--- a/3rdparty/libjpeg/jfdctfst.c
+++ b/3rdparty/libjpeg/jfdctfst.c
@ -2,7 +2,7 @@
 * jfdctfst.c
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2015 by Guido Vollbeding.
+ * Modified 2003-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -44,7 +44,7 @@
 */

 #if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
 #endif


--- a/3rdparty/libjpeg/jidctflt.c
+++ b/3rdparty/libjpeg/jidctflt.c
@ -2,7 +2,7 @@
 * jidctflt.c
 *
 * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2010-2015 by Guido Vollbeding.
+ * Modified 2010-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -50,7 +50,7 @@
 */

 #if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
 #endif


--- a/3rdparty/libjpeg/jidctfst.c
+++ b/3rdparty/libjpeg/jidctfst.c
@ -2,7 +2,7 @@
 * jidctfst.c
 *
 * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2015 by Guido Vollbeding.
+ * Modified 2015-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -46,7 +46,7 @@
 */

 #if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
 #endif


--- a/3rdparty/libjpeg/jidctint.c
+++ b/3rdparty/libjpeg/jidctint.c
@ -2,7 +2,7 @@
 * jidctint.c
 *
 * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2015 by Guido Vollbeding.
+ * Modification developed 2002-2016 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -166,6 +166,7 @@
 /*
 * Perform dequantization and inverse DCT on one block of coefficients.
 *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
 * cK represents sqrt(2) * cos(K*pi/16).
 */

@ -428,7 +429,7 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,

 /*
 * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x7 output block.
+ * producing a reduced-size 7x7 output block.
 *
 * Optimized algorithm with 12 multiplications in the 1-D kernel.
 * cK represents sqrt(2) * cos(K*pi/14).
@ -2623,7 +2624,7 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    tmp0 <<= CONST_BITS;
    /* Add fudge factor here for final descale. */
-    tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);

    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
@ -2920,13 +2921,6 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     * The rotator is c(-6).
     */

-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    z2 <<= CONST_BITS;
@ -2937,6 +2931,13 @@ jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    tmp0 = z2 + z3;
    tmp1 = z2 - z3;

+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
    tmp10 = tmp0 + tmp2;
    tmp13 = tmp0 - tmp2;
    tmp11 = tmp1 + tmp3;
@ -4883,13 +4884,6 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
     * The rotator is c(-6).
     */

-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
    z2 <<= CONST_BITS;
@ -4900,6 +4894,13 @@ jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
    tmp0 = z2 + z3;
    tmp1 = z2 - z3;

+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
    tmp10 = tmp0 + tmp2;
    tmp13 = tmp0 - tmp2;
    tmp11 = tmp1 + tmp3;
--- a/3rdparty/libjpeg/jinclude.h
+++ b/3rdparty/libjpeg/jinclude.h
@ -2,6 +2,7 @@
 * jinclude.h
 *
 * Copyright (C) 1991-1994, Thomas G. Lane.
+ * Modified 2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -83,9 +84,14 @@
 * The modules that use fread() and fwrite() always invoke them through
 * these macros.  On some systems you may need to twiddle the argument casts.
 * CAUTION: argument order is different from underlying functions!
+ *
+ * Furthermore, macros are provided for fflush() and ferror() in order
+ * to facilitate adaption by applications using an own FILE class.
 */

 #define JFREAD(file,buf,sizeofbuf)  \
  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
 #define JFWRITE(file,buf,sizeofbuf)  \
  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFFLUSH(file)	fflush(file)
+#define JFERROR(file)	ferror(file)
--- a/3rdparty/libjpeg/jpegint.h
+++ b/3rdparty/libjpeg/jpegint.h
@ -2,7 +2,7 @@
 * jpegint.h
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2013 by Guido Vollbeding.
+ * Modified 1997-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -260,6 +260,19 @@ struct jpeg_color_quantizer {
 };


+/* Definition of range extension bits for decompression processes.
+ * See the comments with prepare_range_limit_table (in jdmaster.c)
+ * for more info.
+ * The recommended default value for normal applications is 2.
+ * Applications with special requirements may use a different value.
+ * For example, Ghostscript wants to use 3 for proper handling of
+ * wacky images with oversize coefficient values.
+ */
+
+#define RANGE_BITS	2
+#define RANGE_CENTER	(CENTERJSAMPLE << RANGE_BITS)
+
+
 /* Miscellaneous useful macros */

 #undef MAX
--- a/3rdparty/libjpeg/jpeglib.h
+++ b/3rdparty/libjpeg/jpeglib.h
@ -2,7 +2,7 @@
 * jpeglib.h
 *
 * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2002-2015 by Guido Vollbeding.
+ * Modified 2002-2017 by Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -39,7 +39,7 @@ extern "C" {

 #define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
 #define JPEG_LIB_VERSION_MAJOR  9
-#define JPEG_LIB_VERSION_MINOR  2
+#define JPEG_LIB_VERSION_MINOR  3


 /* Various constants determining the sizes of things.
@ -137,9 +137,9 @@ typedef struct {
  /* The decompressor output side may not use these variables. */
  int dc_tbl_no;		/* DC entropy table selector (0..3) */
  int ac_tbl_no;		/* AC entropy table selector (0..3) */
-  
+
  /* Remaining fields should be treated as private by applications. */
-  
+
  /* These values are computed during compression or decompression startup: */
  /* Component's size in DCT blocks.
   * Any dummy blocks added to complete an MCU are not counted; therefore
@ -411,10 +411,10 @@ struct jpeg_compress_struct {
  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
  /* The coefficient controller receives data in units of MCU rows as defined
   * for fully interleaved scans (whether the JPEG file is interleaved or not).
-   * There are v_samp_factor * DCTSIZE sample rows of each component in an
-   * "iMCU" (interleaved MCU) row.
+   * There are v_samp_factor * DCT_v_scaled_size sample rows of each component
+   * in an "iMCU" (interleaved MCU) row.
   */
-  
+
  /*
   * These fields are valid during any one scan.
   * They describe the components and MCUs actually appearing in the scan.
@ -422,10 +422,10 @@ struct jpeg_compress_struct {
  int comps_in_scan;		/* # of JPEG components in this scan */
  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
  /* *cur_comp_info[i] describes component that appears i'th in SOS */
-  
+
  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-  
+
  int blocks_in_MCU;		/* # of DCT blocks per MCU */
  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
  /* MCU_membership[i] is index in cur_comp_info of component owning */
@ -636,7 +636,7 @@ struct jpeg_decompress_struct {
   * in fully interleaved JPEG scans, but are used whether the scan is
   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
   * rows of each component.  Therefore, the IDCT output contains
-   * v_samp_factor*DCT_v_scaled_size sample rows of a component per iMCU row.
+   * v_samp_factor * DCT_v_scaled_size sample rows of a component per iMCU row.
   */

  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
@ -711,7 +711,7 @@ struct jpeg_error_mgr {
 #define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
  /* Reset error state variables at start of a new image */
  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
-  
+
  /* The message ID code and any parameters are saved here.
   * A message can have one string parameter or up to 8 int parameters.
   */
@ -721,11 +721,11 @@ struct jpeg_error_mgr {
    int i[8];
    char s[JMSG_STR_PARM_MAX];
  } msg_parm;
-  
+
  /* Standard state variables for error facility */
-  
+
  int trace_level;		/* max msg_level that will be displayed */
-  
+
  /* For recoverable corrupt-data errors, we emit a warning message,
   * but keep going unless emit_message chooses to abort.  emit_message
   * should count warnings in num_warnings.  The surrounding application
--- a/3rdparty/libjpeg/jversion.h
+++ b/3rdparty/libjpeg/jversion.h
@ -1,7 +1,7 @@
 /*
 * jversion.h
 *
- * Copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1991-2018, Thomas G. Lane, Guido Vollbeding.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -9,6 +9,6 @@
 */


-#define JVERSION	"9b  17-Jan-2016"
+#define JVERSION	"9c  14-Jan-2018"

-#define JCOPYRIGHT	"Copyright (C) 2016, Thomas G. Lane, Guido Vollbeding"
+#define JCOPYRIGHT	"Copyright (C) 2018, Thomas G. Lane, Guido Vollbeding"
--- a/3rdparty/libpng/CHANGES
+++ b/3rdparty/libpng/CHANGES
@ -6066,31 +6066,44 @@ Version 1.6.35 [July 15, 2018]
 Version 1.6.36 [December 1, 2018]
  Optimized png_do_expand_palette for ARM processors.
  Improved performance by around 10-22% on a recent ARM Chromebook.
-  (Contributed by Richard Townsend, ARM Holdings)
+    (Contributed by Richard Townsend, ARM Holdings)
  Fixed manipulation of machine-specific optimization options.
-  (Contributed by Vicki Pfau)
+    (Contributed by Vicki Pfau)
  Used memcpy instead of manual pointer arithmetic on Intel SSE2.
-  (Contributed by Samuel Williams)
+    (Contributed by Samuel Williams)
  Fixed build errors with MSVC on ARM64.
-  (Contributed by Zhijie Liang)
+    (Contributed by Zhijie Liang)
  Fixed detection of libm in CMakeLists.
-  (Contributed by Cameron Cawley)
+    (Contributed by Cameron Cawley)
  Fixed incorrect creation of pkg-config file in CMakeLists.
-  (Contributed by Kyle Bentley)
+    (Contributed by Kyle Bentley)
  Fixed the CMake build on Windows MSYS by avoiding symlinks.
  Fixed a build warning on OpenBSD.
-  (Contributed by Theo Buehler)
+    (Contributed by Theo Buehler)
  Fixed various typos in comments.
-  (Contributed by "luz.paz")
+    (Contributed by "luz.paz")
  Raised the minimum required CMake version from 3.0.2 to 3.1.
  Removed yet more of the vestigial support for pre-ANSI C compilers.
  Removed ancient makefiles for ancient systems that have been broken
-  across all previous libpng-1.6.x versions.
+    across all previous libpng-1.6.x versions.
  Removed the Y2K compliance statement and the export control
-  information.
+    information.
  Applied various code style and documentation fixes.

-Send comments/corrections/commendations to png-mng-implement at lists.sf.net
-(subscription required; visit
+Version 1.6.37 [April 14, 2019]
+  Fixed a use-after-free vulnerability (CVE-2019-7317) in png_image_free.
+  Fixed a memory leak in the ARM NEON implementation of png_do_expand_palette.
+  Fixed a memory leak in pngtest.c.
+  Fixed two vulnerabilities (CVE-2018-14048, CVE-2018-14550) in
+    contrib/pngminus; refactor.
+  Changed the license of contrib/pngminus to MIT; refresh makefile and docs.
+    (Contributed by Willem van Schaik)
+  Fixed a typo in the libpng license v2.
+    (Contributed by Miguel Ojeda)
+  Added makefiles for AddressSanitizer-enabled builds.
+  Cleaned up various makefiles.
+
+Send comments/corrections/commendations to png-mng-implement at lists.sf.net.
+Subscription is required; visit
 https://lists.sourceforge.net/lists/listinfo/png-mng-implement
-to subscribe).
+to subscribe.
--- a/3rdparty/libpng/LICENSE
+++ b/3rdparty/libpng/LICENSE
@ -4,8 +4,8 @@ COPYRIGHT NOTICE, DISCLAIMER, and LICENSE
 PNG Reference Library License version 2
 ---------------------------------------

- * Copyright (c) 1995-2018 The PNG Reference Library Authors.
- * Copyright (c) 2018 Cosmin Truta.
+ * Copyright (c) 1995-2019 The PNG Reference Library Authors.
+ * Copyright (c) 2018-2019 Cosmin Truta.
 * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
 * Copyright (c) 1996-1997 Andreas Dilger.
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -13,7 +13,7 @@ PNG Reference Library License version 2
 The software is supplied "as is", without warranty of any kind,
 express or implied, including, without limitation, the warranties
 of merchantability, fitness for a particular purpose, title, and
-non-infringement.  In no even shall the Copyright owners, or
+non-infringement.  In no event shall the Copyright owners, or
 anyone distributing the software, be liable for any damages or
 other liability, whether in contract, tort or otherwise, arising
 from, out of, or in connection with the software, or the use or
@ -39,7 +39,7 @@ subject to the following restrictions:
 PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35)
 -----------------------------------------------------------------------

-libpng versions 1.0.7, July 1, 2000 through 1.6.35, July 15, 2018 are
+libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are
 Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are
 derived from libpng-1.0.6, and are distributed according to the same
 disclaimer and license as libpng-1.0.6 with the following individuals
--- a/3rdparty/libpng/README
+++ b/3rdparty/libpng/README
@ -1,5 +1,5 @@
-README for libpng version 1.6.36 - December 1, 2018
-===================================================
+README for libpng version 1.6.37 - April 14, 2019
+=================================================

 See the note about version numbers near the top of png.h.
 See INSTALL for instructions on how to install libpng.
--- a/3rdparty/libpng/arm/palette_neon_intrinsics.c
+++ b/3rdparty/libpng/arm/palette_neon_intrinsics.c
@ -1,7 +1,7 @@

 /* palette_neon_intrinsics.c - NEON optimised palette expansion functions
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 2017-2018 Arm Holdings. All rights reserved.
 * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
 *
@ -20,9 +20,9 @@
 #  include <arm_neon.h>
 #endif

-/* Build an RGBA palette from the RGB and separate alpha palettes. */
+/* Build an RGBA8 palette from the separate RGB and alpha palettes. */
 void
-png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
+png_riffle_palette_neon(png_structrp png_ptr)
 {
   png_const_colorp palette = png_ptr->palette;
   png_bytep riffled_palette = png_ptr->riffled_palette;
@ -30,6 +30,8 @@ png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
   int num_trans = png_ptr->num_trans;
   int i;

+   png_debug(1, "in png_riffle_palette_neon");
+
   /* Initially black, opaque. */
   uint8x16x4_t w = {{
      vdupq_n_u8(0x00),
@ -38,16 +40,10 @@ png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
      vdupq_n_u8(0xff),
   }};

-   if (row_info->bit_depth != 8)
-   {
-      png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba");
-      return;
-   }
-
-   /* First, riffle the RGB colours into a RGBA palette, the A value is
-    * set to opaque for now.
+   /* First, riffle the RGB colours into an RGBA8 palette.
+    * The alpha component is set to opaque for now.
    */
-   for (i = 0; i < (1 << row_info->bit_depth); i += 16)
+   for (i = 0; i < 256; i += 16)
   {
      uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
      w.val[0] = v.val[0];
@ -61,9 +57,9 @@ png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
      riffled_palette[(i << 2) + 3] = trans_alpha[i];
 }

-/* Expands a palettized row into RGBA. */
+/* Expands a palettized row into RGBA8. */
 int
-png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info,
+png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info,
    png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
 {
   png_uint_32 row_width = row_info->width;
@ -72,6 +68,8 @@ png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info,
   const png_int_32 pixels_per_chunk = 4;
   int i;

+   png_debug(1, "in png_do_expand_palette_rgba8_neon");
+
   if (row_width < pixels_per_chunk)
      return 0;

@ -103,9 +101,9 @@ png_do_expand_palette_neon_rgba(png_structrp png_ptr, png_row_infop row_info,
   return i;
 }

-/* Expands a palettized row into RGB format. */
+/* Expands a palettized row into RGB8. */
 int
-png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info,
+png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info,
    png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
 {
   png_uint_32 row_width = row_info->width;
@ -113,6 +111,8 @@ png_do_expand_palette_neon_rgb(png_structrp png_ptr, png_row_infop row_info,
   const png_uint_32 pixels_per_chunk = 8;
   int i;

+   png_debug(1, "in png_do_expand_palette_rgb8_neon");
+
   if (row_width <= pixels_per_chunk)
      return 0;

--- a/3rdparty/libpng/png.c
+++ b/3rdparty/libpng/png.c
@ -1,7 +1,7 @@

 /* png.c - location for general purpose libpng functions
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -14,7 +14,7 @@
 #include "pngpriv.h"

 /* Generate a compiler error if there is an old png.h in the search path. */
-typedef png_libpng_version_1_6_36 Your_png_h_is_not_version_1_6_36;
+typedef png_libpng_version_1_6_37 Your_png_h_is_not_version_1_6_37;

 #ifdef __GNUC__
 /* The version tests may need to be added to, but the problem warning has
@ -815,8 +815,8 @@ png_get_copyright(png_const_structrp png_ptr)
   return PNG_STRING_COPYRIGHT
 #else
   return PNG_STRING_NEWLINE \
-      "libpng version 1.6.36" PNG_STRING_NEWLINE \
-      "Copyright (c) 2018 Cosmin Truta" PNG_STRING_NEWLINE \
+      "libpng version 1.6.37" PNG_STRING_NEWLINE \
+      "Copyright (c) 2018-2019 Cosmin Truta" PNG_STRING_NEWLINE \
      "Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson" \
      PNG_STRING_NEWLINE \
      "Copyright (c) 1996-1997 Andreas Dilger" PNG_STRING_NEWLINE \
@ -4588,8 +4588,7 @@ png_image_free(png_imagep image)
   if (image != NULL && image->opaque != NULL &&
      image->opaque->error_buf == NULL)
   {
-      /* Ignore errors here: */
-      (void)png_safe_execute(image, png_image_free_function, image);
+      png_image_free_function(image);
      image->opaque = NULL;
   }
 }
--- a/3rdparty/libpng/png.h
+++ b/3rdparty/libpng/png.h
@ -1,9 +1,9 @@

 /* png.h - header file for PNG reference library
 *
- * libpng version 1.6.36 - December 1, 2018
+ * libpng version 1.6.37 - April 14, 2019
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -14,8 +14,9 @@
 *   libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
 *   libpng versions 0.89, June 1996, through 0.96, May 1997: Andreas Dilger
 *   libpng versions 0.97, January 1998, through 1.6.35, July 2018:
- *     Glenn Randers-Pehrson.
- *   libpng version 1.6.36, December 1, 2018: Cosmin Truta
+ *     Glenn Randers-Pehrson
+ *   libpng versions 1.6.36, December 2018, through 1.6.37, April 2019:
+ *     Cosmin Truta
 *   See also "Contributing Authors", below.
 */

@ -26,8 +27,8 @@
 * PNG Reference Library License version 2
 * ---------------------------------------
 *
- *  * Copyright (c) 1995-2018 The PNG Reference Library Authors.
- *  * Copyright (c) 2018 Cosmin Truta.
+ *  * Copyright (c) 1995-2019 The PNG Reference Library Authors.
+ *  * Copyright (c) 2018-2019 Cosmin Truta.
 *  * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
 *  * Copyright (c) 1996-1997 Andreas Dilger.
 *  * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -35,7 +36,7 @@
 * The software is supplied "as is", without warranty of any kind,
 * express or implied, including, without limitation, the warranties
 * of merchantability, fitness for a particular purpose, title, and
- * non-infringement.  In no even shall the Copyright owners, or
+ * non-infringement.  In no event shall the Copyright owners, or
 * anyone distributing the software, be liable for any damages or
 * other liability, whether in contract, tort or otherwise, arising
 * from, out of, or in connection with the software, or the use or
@ -61,7 +62,7 @@
 * PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35)
 * -----------------------------------------------------------------------
 *
- * libpng versions 1.0.7, July 1, 2000 through 1.6.35, July 15, 2018 are
+ * libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are
 * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are
 * derived from libpng-1.0.6, and are distributed according to the same
 * disclaimer and license as libpng-1.0.6 with the following individuals
@ -238,7 +239,7 @@
 *    ...
 *    1.5.30                  15    10530  15.so.15.30[.0]
 *    ...
- *    1.6.36                  16    10636  16.so.16.36[.0]
+ *    1.6.37                  16    10637  16.so.16.37[.0]
 *
 *    Henceforth the source version will match the shared-library major and
 *    minor numbers; the shared-library major version number will be used for
@ -277,8 +278,8 @@
 */

 /* Version information for png.h - this should match the version in png.c */
-#define PNG_LIBPNG_VER_STRING "1.6.36"
-#define PNG_HEADER_VERSION_STRING " libpng version 1.6.36 - December 1, 2018\n"
+#define PNG_LIBPNG_VER_STRING "1.6.37"
+#define PNG_HEADER_VERSION_STRING " libpng version 1.6.37 - April 14, 2019\n"

 #define PNG_LIBPNG_VER_SONUM   16
 #define PNG_LIBPNG_VER_DLLNUM  16
@ -286,12 +287,11 @@
 /* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
 #define PNG_LIBPNG_VER_MAJOR   1
 #define PNG_LIBPNG_VER_MINOR   6
-#define PNG_LIBPNG_VER_RELEASE 36
+#define PNG_LIBPNG_VER_RELEASE 37

-/* This should match the numeric part of the final component of
- * PNG_LIBPNG_VER_STRING, omitting any leading zero:
+/* This should be zero for a public release, or non-zero for a
+ * development version.  [Deprecated]
 */
-
 #define PNG_LIBPNG_VER_BUILD  0

 /* Release Status */
@ -318,7 +318,7 @@
 * From version 1.0.1 it is:
 * XXYYZZ, where XX=major, YY=minor, ZZ=release
 */
-#define PNG_LIBPNG_VER 10636 /* 1.6.36 */
+#define PNG_LIBPNG_VER 10637 /* 1.6.37 */

 /* Library configuration: these options cannot be changed after
 * the library has been built.
@ -428,7 +428,7 @@ extern "C" {
 /* This triggers a compiler error in png.c, if png.c and png.h
 * do not agree upon the version number.
 */
-typedef char* png_libpng_version_1_6_36;
+typedef char* png_libpng_version_1_6_37;

 /* Basic control structions.  Read libpng-manual.txt or libpng.3 for more info.
 *
--- a/3rdparty/libpng/pngconf.h
+++ b/3rdparty/libpng/pngconf.h
@ -1,9 +1,9 @@

-/* pngconf.h - machine configurable file for libpng
+/* pngconf.h - machine-configurable file for libpng
 *
- * libpng version 1.6.36
+ * libpng version 1.6.37
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
--- a/3rdparty/libpng/pnglibconf.h
+++ b/3rdparty/libpng/pnglibconf.h
@ -1,8 +1,8 @@
 /* pnglibconf.h - library build configuration */

-/* libpng version 1.6.36 */
+/* libpng version 1.6.37 */

-/* Copyright (c) 2018 Cosmin Truta */
+/* Copyright (c) 2018-2019 Cosmin Truta */
 /* Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson */

 /* This code is released under the libpng license. */
--- a/3rdparty/libpng/pngpriv.h
+++ b/3rdparty/libpng/pngpriv.h
@ -1,7 +1,7 @@

 /* pngpriv.h - private declarations for use inside libpng
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -2119,11 +2119,11 @@ PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,

 #if PNG_ARM_NEON_IMPLEMENTATION == 1
 PNG_INTERNAL_FUNCTION(void,
-                      png_riffle_palette_rgba,
-                      (png_structrp, png_row_infop),
+                      png_riffle_palette_neon,
+                      (png_structrp),
                      PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(int,
-                      png_do_expand_palette_neon_rgba,
+                      png_do_expand_palette_rgba8_neon,
                      (png_structrp,
                       png_row_infop,
                       png_const_bytep,
@ -2131,7 +2131,7 @@ PNG_INTERNAL_FUNCTION(int,
                       const png_bytepp),
                      PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(int,
-                      png_do_expand_palette_neon_rgb,
+                      png_do_expand_palette_rgb8_neon,
                      (png_structrp,
                       png_row_infop,
                       png_const_bytep,
--- a/3rdparty/libpng/pngread.c
+++ b/3rdparty/libpng/pngread.c
@ -1,7 +1,7 @@

 /* pngread.c - read a PNG file
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -994,6 +994,12 @@ png_read_destroy(png_structrp png_ptr)
   png_ptr->chunk_list = NULL;
 #endif

+#if defined(PNG_READ_EXPAND_SUPPORTED) && \
+    defined(PNG_ARM_NEON_IMPLEMENTATION)
+   png_free(png_ptr, png_ptr->riffled_palette);
+   png_ptr->riffled_palette = NULL;
+#endif
+
   /* NOTE: the 'setjmp' buffer may still be allocated and the memory and error
    * callbacks are still set at this point.  They are required to complete the
    * destruction of the png_struct itself.
--- a/3rdparty/libpng/pngrtran.c
+++ b/3rdparty/libpng/pngrtran.c
@ -1,7 +1,7 @@

 /* pngrtran.c - transforms the data in a row for PNG readers
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -1182,20 +1182,20 @@ png_init_palette_transformations(png_structrp png_ptr)
             png_ptr->palette[png_ptr->background.index].blue;

 #ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-        if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
-        {
-           if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0)
-           {
-              /* Invert the alpha channel (in tRNS) unless the pixels are
-               * going to be expanded, in which case leave it for later
-               */
-              int i, istop = png_ptr->num_trans;
-
-              for (i=0; i<istop; i++)
-                 png_ptr->trans_alpha[i] = (png_byte)(255 -
-                    png_ptr->trans_alpha[i]);
-           }
-        }
+         if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
+         {
+            if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0)
+            {
+               /* Invert the alpha channel (in tRNS) unless the pixels are
+                * going to be expanded, in which case leave it for later
+                */
+               int i, istop = png_ptr->num_trans;
+
+               for (i = 0; i < istop; i++)
+                  png_ptr->trans_alpha[i] =
+                      (png_byte)(255 - png_ptr->trans_alpha[i]);
+            }
+         }
 #endif /* READ_INVERT_ALPHA */
      }
   } /* background expand and (therefore) no alpha association. */
@ -4320,9 +4320,11 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
                   * but sometimes row_info->bit_depth has been changed to 8.
                   * In these cases, the palette hasn't been riffled.
                   */
-                  i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row,
+                  i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row,
                      &sp, &dp);
               }
+#else
+               PNG_UNUSED(png_ptr)
 #endif

               for (; i < row_width; i++)
@ -4349,8 +4351,10 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
               dp = row + (size_t)(row_width * 3) - 1;
               i = 0;
 #ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
-               i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row,
+               i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row,
                   &sp, &dp);
+#else
+               PNG_UNUSED(png_ptr)
 #endif

               for (; i < row_width; i++)
@ -4770,19 +4774,17 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
 #ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
         if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8))
         {
-            /* Allocate space for the decompressed full palette. */
            if (png_ptr->riffled_palette == NULL)
            {
-               png_ptr->riffled_palette = png_malloc(png_ptr, 256*4);
-               if (png_ptr->riffled_palette == NULL)
-                  png_error(png_ptr, "NULL row buffer");
-               /* Build the RGBA palette. */
-               png_riffle_palette_rgba(png_ptr, row_info);
+               /* Initialize the accelerated palette expansion. */
+               png_ptr->riffled_palette =
+                   (png_bytep)png_malloc(png_ptr, 256 * 4);
+               png_riffle_palette_neon(png_ptr);
            }
         }
 #endif
         png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
-            png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
+             png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
      }

      else
--- a/3rdparty/libpng/pngstruct.h
+++ b/3rdparty/libpng/pngstruct.h
@ -1,7 +1,7 @@

 /* pngstruct.h - header file for PNG reference library
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -228,10 +228,6 @@ struct png_struct_def
                               * big_row_buf; while writing it is separately
                               * allocated.
                               */
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   /* Buffer to accelerate palette transformations. */
-   png_bytep riffled_palette;
-#endif
 #ifdef PNG_WRITE_FILTER_SUPPORTED
   png_bytep try_row;    /* buffer to save trial row when filtering */
   png_bytep tst_row;    /* buffer to save best trial row when filtering */
@ -396,6 +392,12 @@ struct png_struct_def
   /* deleted in 1.5.5: rgb_to_gray_blue_coeff; */
 #endif

+/* New member added in libpng-1.6.36 */
+#if defined(PNG_READ_EXPAND_SUPPORTED) && \
+    defined(PNG_ARM_NEON_IMPLEMENTATION)
+   png_bytep riffled_palette; /* buffer for accelerated palette expansion */
+#endif
+
 /* New member added in libpng-1.0.4 (renamed in 1.0.9) */
 #if defined(PNG_MNG_FEATURES_SUPPORTED)
 /* Changed from png_byte to png_uint_32 at version 1.2.0 */
--- a/3rdparty/libpng/pngwrite.c
+++ b/3rdparty/libpng/pngwrite.c
@ -1,7 +1,7 @@

 /* pngwrite.c - general routines to write a PNG file
 *
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2019 Cosmin Truta
 * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
 * Copyright (c) 1996-1997 Andreas Dilger
 * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
@ -948,10 +948,6 @@ png_write_destroy(png_structrp png_ptr)
   png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
   png_free(png_ptr, png_ptr->row_buf);
   png_ptr->row_buf = NULL;
-#ifdef PNG_READ_EXPANDED_SUPPORTED
-   png_free(png_ptr, png_ptr->riffled_palette);
-   png_ptr->riffled_palette = NULL;
-#endif
 #ifdef PNG_WRITE_FILTER_SUPPORTED
   png_free(png_ptr, png_ptr->prev_row);
   png_free(png_ptr, png_ptr->try_row);
--- a/3rdparty/libwebp/CMakeLists.txt
+++ b/3rdparty/libwebp/CMakeLists.txt
@ -21,6 +21,11 @@ if(ANDROID AND ARMEABI_V7A AND NOT NEON)
  endforeach()
 endif()

+# FIX for quant.h - requires C99 for() loops
+ocv_check_flag_support(C "-std=c99" _varname "${CMAKE_C_FLAGS}")
+if(${_varname})
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
+endif()


 # ----------------------------------------------------------------------------------
--- a/3rdparty/libwebp/COPYING
+++ b/3rdparty/libwebp/COPYING
@ -0,0 +1,30 @@
+Copyright (c) 2010, Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of Google nor the names of its contributors may
+    be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- a/3rdparty/libwebp/src/dec/alphai_dec.h
+++ b/3rdparty/libwebp/src/dec/alphai_dec.h
@ -51,4 +51,4 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_ALPHAI_DEC_H_ */
+#endif  // WEBP_DEC_ALPHAI_DEC_H_
--- a/3rdparty/libwebp/src/dec/buffer_dec.c
+++ b/3rdparty/libwebp/src/dec/buffer_dec.c
@ -74,7 +74,8 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
  } else {    // RGB checks
    const WebPRGBABuffer* const buf = &buffer->u.RGBA;
    const int stride = abs(buf->stride);
-    const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
+    const uint64_t size =
+        MIN_BUFFER_SIZE(width * kModeBpp[mode], height, stride);
    ok &= (size <= buf->size);
    ok &= (stride >= width * kModeBpp[mode]);
    ok &= (buf->rgba != NULL);
--- a/3rdparty/libwebp/src/dec/common_dec.h
+++ b/3rdparty/libwebp/src/dec/common_dec.h
@ -51,4 +51,4 @@ enum { MB_FEATURE_TREE_PROBS = 3,
       NUM_PROBAS = 11
     };

-#endif    // WEBP_DEC_COMMON_DEC_H_
+#endif  // WEBP_DEC_COMMON_DEC_H_
--- a/3rdparty/libwebp/src/dec/frame_dec.c
+++ b/3rdparty/libwebp/src/dec/frame_dec.c
@ -338,7 +338,6 @@ void VP8InitDithering(const WebPDecoderOptions* const options,
      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
        VP8QuantMatrix* const dqm = &dec->dqm_[s];
        if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
-          // TODO(skal): should we specially dither more for uv_quant_ < 0?
          const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
          dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
        }
@ -669,15 +668,9 @@ int VP8GetThreadMethod(const WebPDecoderOptions* const options,
  (void)height;
  assert(headers == NULL || !headers->is_lossless);
 #if defined(WEBP_USE_THREAD)
-  if (width < MIN_WIDTH_FOR_THREADS) return 0;
-  // TODO(skal): tune the heuristic further
-#if 0
-  if (height < 2 * width) return 2;
+  if (width >= MIN_WIDTH_FOR_THREADS) return 2;
 #endif
-  return 2;
-#else   // !WEBP_USE_THREAD
  return 0;
-#endif
 }

 #undef MT_CACHE_LINES
--- a/3rdparty/libwebp/src/dec/idec_dec.c
+++ b/3rdparty/libwebp/src/dec/idec_dec.c
@ -140,10 +140,9 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
      if (NeedCompressedAlpha(idec)) {
        ALPHDecoder* const alph_dec = dec->alph_dec_;
        dec->alpha_data_ += offset;
-        if (alph_dec != NULL) {
+        if (alph_dec != NULL && alph_dec->vp8l_dec_ != NULL) {
          if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
-            assert(alph_vp8l_dec != NULL);
            assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
                                   dec->alpha_data_ + ALPHA_HEADER_LEN,
@ -283,10 +282,8 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec,

 static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
  if (idec->state_ == STATE_VP8_DATA) {
-    VP8Io* const io = &idec->io_;
-    if (io->teardown != NULL) {
-      io->teardown(io);
-    }
+    // Synchronize the thread, clean-up and check for errors.
+    VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
  }
  idec->state_ = STATE_ERROR;
  return error;
@ -451,7 +448,10 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
  VP8Io* const io = &idec->io_;

-  assert(dec->ready_);
+  // Make sure partition #0 has been read before, to set dec to ready_.
+  if (!dec->ready_) {
+    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+  }
  for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
    if (idec->last_mb_y_ != dec->mb_y_) {
      if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
@ -473,6 +473,12 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
            MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
          return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
        }
+        // Synchronize the threads.
+        if (dec->mt_method_ > 0) {
+          if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) {
+            return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+          }
+        }
        RestoreContext(&context, dec, token_br);
        return VP8_STATUS_SUSPENDED;
      }
@ -491,6 +497,7 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
  }
  // Synchronize the thread and check for errors.
  if (!VP8ExitCritical(dec, io)) {
+    idec->state_ = STATE_ERROR;  // prevent re-entry in IDecError
    return IDecError(idec, VP8_STATUS_USER_ABORT);
  }
  dec->ready_ = 0;
@ -571,6 +578,10 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
    status = DecodePartition0(idec);
  }
  if (idec->state_ == STATE_VP8_DATA) {
+    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    if (dec == NULL) {
+      return VP8_STATUS_SUSPENDED;  // can't continue if we have no decoder.
+    }
    status = DecodeRemaining(idec);
  }
  if (idec->state_ == STATE_VP8L_HEADER) {
--- a/3rdparty/libwebp/src/dec/vp8_dec.h
+++ b/3rdparty/libwebp/src/dec/vp8_dec.h
@ -182,4 +182,4 @@ WEBP_EXTERN int VP8LGetInfo(
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_VP8_DEC_H_ */
+#endif  // WEBP_DEC_VP8_DEC_H_
--- a/3rdparty/libwebp/src/dec/vp8i_dec.h
+++ b/3rdparty/libwebp/src/dec/vp8i_dec.h
@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 1
 #define DEC_MIN_VERSION 0
-#define DEC_REV_VERSION 0
+#define DEC_REV_VERSION 2

 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
@ -316,4 +316,4 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_VP8I_DEC_H_ */
+#endif  // WEBP_DEC_VP8I_DEC_H_
--- a/3rdparty/libwebp/src/dec/vp8l_dec.c
+++ b/3rdparty/libwebp/src/dec/vp8l_dec.c
@ -362,12 +362,19 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
  VP8LMetadata* const hdr = &dec->hdr_;
  uint32_t* huffman_image = NULL;
  HTreeGroup* htree_groups = NULL;
+  // When reading htrees, some might be unused, as the format allows it.
+  // We will still read them but put them in this htree_group_bogus.
+  HTreeGroup htree_group_bogus;
  HuffmanCode* huffman_tables = NULL;
+  HuffmanCode* huffman_tables_bogus = NULL;
  HuffmanCode* next = NULL;
  int num_htree_groups = 1;
+  int num_htree_groups_max = 1;
  int max_alphabet_size = 0;
  int* code_lengths = NULL;
  const int table_size = kTableSize[color_cache_bits];
+  int* mapping = NULL;
+  int ok = 0;

  if (allow_recursion && VP8LReadBits(br, 1)) {
    // use meta Huffman codes.
@ -384,9 +391,41 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
      // The huffman data is stored in red and green bytes.
      const int group = (huffman_image[i] >> 8) & 0xffff;
      huffman_image[i] = group;
-      if (group >= num_htree_groups) {
-        num_htree_groups = group + 1;
+      if (group >= num_htree_groups_max) {
+        num_htree_groups_max = group + 1;
+      }
+    }
+    // Check the validity of num_htree_groups_max. If it seems too big, use a
+    // smaller value for later. This will prevent big memory allocations to end
+    // up with a bad bitstream anyway.
+    // The value of 1000 is totally arbitrary. We know that num_htree_groups_max
+    // is smaller than (1 << 16) and should be smaller than the number of pixels
+    // (though the format allows it to be bigger).
+    if (num_htree_groups_max > 1000 || num_htree_groups_max > xsize * ysize) {
+      // Create a mapping from the used indices to the minimal set of used
+      // values [0, num_htree_groups)
+      mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
+      if (mapping == NULL) {
+        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+        goto Error;
+      }
+      // -1 means a value is unmapped, and therefore unused in the Huffman
+      // image.
+      memset(mapping, 0xff, num_htree_groups_max * sizeof(*mapping));
+      for (num_htree_groups = 0, i = 0; i < huffman_pixs; ++i) {
+        // Get the current mapping for the group and remap the Huffman image.
+        int* const mapped_group = &mapping[huffman_image[i]];
+        if (*mapped_group == -1) *mapped_group = num_htree_groups++;
+        huffman_image[i] = *mapped_group;
+      }
+      huffman_tables_bogus = (HuffmanCode*)WebPSafeMalloc(
+          table_size, sizeof(*huffman_tables_bogus));
+      if (huffman_tables_bogus == NULL) {
+        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+        goto Error;
      }
+    } else {
+      num_htree_groups = num_htree_groups_max;
    }
  }

@ -403,11 +442,11 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
    }
  }

+  code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
+                                      sizeof(*code_lengths));
  huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
                                                sizeof(*huffman_tables));
  htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
-  code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
-                                      sizeof(*code_lengths));

  if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
@ -415,28 +454,35 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
  }

  next = huffman_tables;
-  for (i = 0; i < num_htree_groups; ++i) {
-    HTreeGroup* const htree_group = &htree_groups[i];
+  for (i = 0; i < num_htree_groups_max; ++i) {
+    // If the index "i" is unused in the Huffman image, read the coefficients
+    // but store them to a bogus htree_group.
+    const int is_bogus = (mapping != NULL && mapping[i] == -1);
+    HTreeGroup* const htree_group =
+        is_bogus ? &htree_group_bogus :
+        &htree_groups[(mapping == NULL) ? i : mapping[i]];
    HuffmanCode** const htrees = htree_group->htrees;
+    HuffmanCode* huffman_tables_i = is_bogus ? huffman_tables_bogus : next;
    int size;
    int total_size = 0;
    int is_trivial_literal = 1;
    int max_bits = 0;
    for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
      int alphabet_size = kAlphabetSize[j];
-      htrees[j] = next;
+      htrees[j] = huffman_tables_i;
      if (j == 0 && color_cache_bits > 0) {
        alphabet_size += 1 << color_cache_bits;
      }
-      size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
+      size =
+          ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables_i);
      if (size == 0) {
        goto Error;
      }
      if (is_trivial_literal && kLiteralMap[j] == 1) {
-        is_trivial_literal = (next->bits == 0);
+        is_trivial_literal = (huffman_tables_i->bits == 0);
      }
-      total_size += next->bits;
-      next += size;
+      total_size += huffman_tables_i->bits;
+      huffman_tables_i += size;
      if (j <= ALPHA) {
        int local_max_bits = code_lengths[0];
        int k;
@ -448,38 +494,41 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
        max_bits += local_max_bits;
      }
    }
+    if (!is_bogus) next = huffman_tables_i;
    htree_group->is_trivial_literal = is_trivial_literal;
    htree_group->is_trivial_code = 0;
    if (is_trivial_literal) {
      const int red = htrees[RED][0].value;
      const int blue = htrees[BLUE][0].value;
      const int alpha = htrees[ALPHA][0].value;
-      htree_group->literal_arb =
-          ((uint32_t)alpha << 24) | (red << 16) | blue;
+      htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
      if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
        htree_group->is_trivial_code = 1;
        htree_group->literal_arb |= htrees[GREEN][0].value << 8;
      }
    }
-    htree_group->use_packed_table = !htree_group->is_trivial_code &&
-                                    (max_bits < HUFFMAN_PACKED_BITS);
+    htree_group->use_packed_table =
+        !htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
    if (htree_group->use_packed_table) BuildPackedTable(htree_group);
  }
-  WebPSafeFree(code_lengths);
+  ok = 1;

-  // All OK. Finalize pointers and return.
+  // All OK. Finalize pointers.
  hdr->huffman_image_ = huffman_image;
  hdr->num_htree_groups_ = num_htree_groups;
  hdr->htree_groups_ = htree_groups;
  hdr->huffman_tables_ = huffman_tables;
-  return 1;

 Error:
  WebPSafeFree(code_lengths);
-  WebPSafeFree(huffman_image);
-  WebPSafeFree(huffman_tables);
-  VP8LHtreeGroupsFree(htree_groups);
-  return 0;
+  WebPSafeFree(huffman_tables_bogus);
+  WebPSafeFree(mapping);
+  if (!ok) {
+    WebPSafeFree(huffman_image);
+    WebPSafeFree(huffman_tables);
+    VP8LHtreeGroupsFree(htree_groups);
+  }
+  return ok;
 }

 //------------------------------------------------------------------------------
@ -884,7 +933,11 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
 #endif
        break;
      case 2:
+#if !defined(WORDS_BIGENDIAN)
        memcpy(&pattern, src, sizeof(uint16_t));
+#else
+        pattern = ((uint32_t)src[0] << 8) | src[1];
+#endif
 #if defined(__arm__) || defined(_M_ARM)
        pattern |= pattern << 16;
 #elif defined(WEBP_USE_MIPS_DSP_R2)
@ -1523,7 +1576,6 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
  if (dec == NULL) return 0;

  assert(alph_dec != NULL);
-  alph_dec->vp8l_dec_ = dec;

  dec->width_ = alph_dec->width_;
  dec->height_ = alph_dec->height_;
@ -1555,11 +1607,12 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,

  if (!ok) goto Err;

+  // Only set here, once we are sure it is valid (to avoid thread races).
+  alph_dec->vp8l_dec_ = dec;
  return 1;

 Err:
-  VP8LDelete(alph_dec->vp8l_dec_);
-  alph_dec->vp8l_dec_ = NULL;
+  VP8LDelete(dec);
  return 0;
 }

--- a/3rdparty/libwebp/src/dec/vp8li_dec.h
+++ b/3rdparty/libwebp/src/dec/vp8li_dec.h
@ -132,4 +132,4 @@ void VP8LDelete(VP8LDecoder* const dec);
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_VP8LI_DEC_H_ */
+#endif  // WEBP_DEC_VP8LI_DEC_H_
--- a/3rdparty/libwebp/src/dec/webpi_dec.h
+++ b/3rdparty/libwebp/src/dec/webpi_dec.h
@ -130,4 +130,4 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
 }    // extern "C"
 #endif

-#endif  /* WEBP_DEC_WEBPI_DEC_H_ */
+#endif  // WEBP_DEC_WEBPI_DEC_H_
--- a/3rdparty/libwebp/src/demux/demux.c
+++ b/3rdparty/libwebp/src/demux/demux.c
@ -25,7 +25,7 @@

 #define DMUX_MAJ_VERSION 1
 #define DMUX_MIN_VERSION 0
-#define DMUX_REV_VERSION 0
+#define DMUX_REV_VERSION 2

 typedef struct {
  size_t start_;        // start location of the data
--- a/3rdparty/libwebp/src/dsp/cost.c
+++ b/3rdparty/libwebp/src/dsp/cost.c
@ -377,6 +377,7 @@ VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
 extern void VP8EncDspCostInitMIPS32(void);
 extern void VP8EncDspCostInitMIPSdspR2(void);
 extern void VP8EncDspCostInitSSE2(void);
+extern void VP8EncDspCostInitNEON(void);

 WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) {
  VP8GetResidualCost = GetResidualCost_C;
@ -398,6 +399,11 @@ WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) {
    if (VP8GetCPUInfo(kSSE2)) {
      VP8EncDspCostInitSSE2();
    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8EncDspCostInitNEON();
+    }
 #endif
  }
 }
--- a/3rdparty/libwebp/src/dsp/cost_neon.c
+++ b/3rdparty/libwebp/src/dsp/cost_neon.c
@ -0,0 +1,122 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of cost functions
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include "src/dsp/neon.h"
+#include "src/enc/cost_enc.h"
+
+static const uint8_t position[16] = { 1, 2,  3,  4,  5,  6,  7,  8,
+                                      9, 10, 11, 12, 13, 14, 15, 16 };
+
+static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
+                                   VP8Residual* const res) {
+  const int16x8_t minus_one = vdupq_n_s16(-1);
+  const int16x8_t coeffs_0 = vld1q_s16(coeffs);
+  const int16x8_t coeffs_1 = vld1q_s16(coeffs + 8);
+  const uint16x8_t eob_0 = vtstq_s16(coeffs_0, minus_one);
+  const uint16x8_t eob_1 = vtstq_s16(coeffs_1, minus_one);
+  const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1));
+  const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position));
+
+#ifdef __aarch64__
+  res->last = vmaxvq_u8(masked) - 1;
+#else
+  const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked));
+  const uint16x8_t eob_16x8 = vmovl_u8(eob_8x8);
+  const uint16x4_t eob_16x4 =
+      vmax_u16(vget_low_u16(eob_16x8), vget_high_u16(eob_16x8));
+  const uint32x4_t eob_32x4 = vmovl_u16(eob_16x4);
+  uint32x2_t eob_32x2 =
+      vmax_u32(vget_low_u32(eob_32x4), vget_high_u32(eob_32x4));
+  eob_32x2 = vpmax_u32(eob_32x2, eob_32x2);
+
+  vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0);
+  --res->last;
+#endif  // __aarch64__
+
+  res->coeffs = coeffs;
+}
+
+static int GetResidualCost_NEON(int ctx0, const VP8Residual* const res) {
+  uint8_t levels[16], ctxs[16];
+  uint16_t abs_levels[16];
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  const int p0 = res->prob[n][ctx0][0];
+  CostArrayPtr const costs = res->costs;
+  const uint16_t* t = costs[n][ctx0];
+  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+  // be missing during the loop.
+  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+
+  {   // precompute clamped levels and contexts, packed to 8b.
+    const uint8x16_t kCst2 = vdupq_n_u8(2);
+    const uint8x16_t kCst67 = vdupq_n_u8(MAX_VARIABLE_LEVEL);
+    const int16x8_t c0 = vld1q_s16(res->coeffs);
+    const int16x8_t c1 = vld1q_s16(res->coeffs + 8);
+    const uint16x8_t E0 = vreinterpretq_u16_s16(vabsq_s16(c0));
+    const uint16x8_t E1 = vreinterpretq_u16_s16(vabsq_s16(c1));
+    const uint8x16_t F = vcombine_u8(vqmovn_u16(E0), vqmovn_u16(E1));
+    const uint8x16_t G = vminq_u8(F, kCst2);   // context = 0,1,2
+    const uint8x16_t H = vminq_u8(F, kCst67);  // clamp_level in [0..67]
+
+    vst1q_u8(ctxs, G);
+    vst1q_u8(levels, H);
+
+    vst1q_u16(abs_levels, E0);
+    vst1q_u16(abs_levels + 8, E1);
+  }
+  for (; n < res->last; ++n) {
+    const int ctx = ctxs[n];
+    const int level = levels[n];
+    const int flevel = abs_levels[n];   // full level
+    cost += VP8LevelFixedCosts[flevel] + t[level];  // simplified VP8LevelCost()
+    t = costs[n + 1][ctx];
+  }
+  // Last coefficient is always non-zero
+  {
+    const int level = levels[n];
+    const int flevel = abs_levels[n];
+    assert(flevel != 0);
+    cost += VP8LevelFixedCosts[flevel] + t[level];
+    if (n < 15) {
+      const int b = VP8EncBands[n + 1];
+      const int ctx = ctxs[n];
+      const int last_p0 = res->prob[b][ctx][0];
+      cost += VP8BitCost(0, last_p0);
+    }
+  }
+  return cost;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspCostInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitNEON(void) {
+  VP8SetResidualCoeffs = SetResidualCoeffs_NEON;
+  VP8GetResidualCost = GetResidualCost_NEON;
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8EncDspCostInitNEON)
+
+#endif  // WEBP_USE_NEON
--- a/3rdparty/libwebp/src/dsp/dsp.h
+++ b/3rdparty/libwebp/src/dsp/dsp.h
@ -76,10 +76,6 @@ extern "C" {
 #define WEBP_USE_SSE41
 #endif

-#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
-#define WEBP_USE_AVX2
-#endif
-
 // The intrinsics currently cause compiler errors with arm-nacl-gcc and the
 // inline assembly would need to be modified for use with Native Client.
 #if (defined(__ARM_NEON__) || \
@ -679,4 +675,4 @@ void VP8FiltersInit(void);
 }    // extern "C"
 #endif

-#endif  /* WEBP_DSP_DSP_H_ */
+#endif  // WEBP_DSP_DSP_H_
--- a/3rdparty/libwebp/src/dsp/enc.c
+++ b/3rdparty/libwebp/src/dsp/enc.c
@ -734,7 +734,6 @@ VP8BlockCopy VP8Copy16x8;

 extern void VP8EncDspInitSSE2(void);
 extern void VP8EncDspInitSSE41(void);
-extern void VP8EncDspInitAVX2(void);
 extern void VP8EncDspInitNEON(void);
 extern void VP8EncDspInitMIPS32(void);
 extern void VP8EncDspInitMIPSdspR2(void);
@ -784,11 +783,6 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
 #endif
    }
 #endif
-#if defined(WEBP_USE_AVX2)
-    if (VP8GetCPUInfo(kAVX2)) {
-      VP8EncDspInitAVX2();
-    }
-#endif
 #if defined(WEBP_USE_MIPS32)
    if (VP8GetCPUInfo(kMIPS32)) {
      VP8EncDspInitMIPS32();
--- a/3rdparty/libwebp/src/dsp/enc_avx2.c
+++ b/3rdparty/libwebp/src/dsp/enc_avx2.c
@ -1,21 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// AVX2 version of speed-critical encoding functions.
-
-#include "src/dsp/dsp.h"
-
-#if defined(WEBP_USE_AVX2)
-
-#endif  // WEBP_USE_AVX2
-
-//------------------------------------------------------------------------------
-// Entry point
-
-WEBP_DSP_INIT_STUB(VP8EncDspInitAVX2)
--- a/3rdparty/libwebp/src/dsp/lossless.c
+++ b/3rdparty/libwebp/src/dsp/lossless.c
@ -23,8 +23,6 @@
 #include "src/dsp/lossless.h"
 #include "src/dsp/lossless_common.h"

-#define MAX_DIFF_COST (1e30f)
-
 //------------------------------------------------------------------------------
 // Image transforms.

--- a/3rdparty/libwebp/src/dsp/lossless.h
+++ b/3rdparty/libwebp/src/dsp/lossless.h
@ -163,7 +163,7 @@ extern VP8LCostCombinedFunc VP8LExtraCostCombined;
 extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;

 typedef struct {        // small struct to hold counters
-  int counts[2];        // index: 0=zero steak, 1=non-zero streak
+  int counts[2];        // index: 0=zero streak, 1=non-zero streak
  int streaks[2][2];    // [zero/non-zero][streak<3 / streak>=3]
 } VP8LStreaks;

@ -194,10 +194,14 @@ extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
 void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
                              VP8LBitEntropy* const entropy);

-typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
-                                     const VP8LHistogram* const b,
-                                     VP8LHistogram* const out);
-extern VP8LHistogramAddFunc VP8LHistogramAdd;
+typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
+                                  uint32_t* out, int size);
+extern VP8LAddVectorFunc VP8LAddVector;
+typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
+extern VP8LAddVectorEqFunc VP8LAddVectorEq;
+void VP8LHistogramAdd(const VP8LHistogram* const a,
+                      const VP8LHistogram* const b,
+                      VP8LHistogram* const out);

 // -----------------------------------------------------------------------------
 // PrefixEncode()
--- a/3rdparty/libwebp/src/dsp/lossless_enc.c
+++ b/3rdparty/libwebp/src/dsp/lossless_enc.c
@ -632,38 +632,67 @@ static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,

 //------------------------------------------------------------------------------

-static void HistogramAdd_C(const VP8LHistogram* const a,
-                           const VP8LHistogram* const b,
-                           VP8LHistogram* const out) {
+static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
+                        int size) {
+  int i;
+  for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
+}
+
+static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
+  int i;
+  for (i = 0; i < size; ++i) out[i] += a[i];
+}
+
+#define ADD(X, ARG, LEN) do {                                                  \
+  if (a->is_used_[X]) {                                                        \
+    if (b->is_used_[X]) {                                                      \
+      VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN));                          \
+    } else {                                                                   \
+      memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0]));           \
+    }                                                                          \
+  } else if (b->is_used_[X]) {                                                 \
+    memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0]));             \
+  } else {                                                                     \
+    memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0]));                      \
+  }                                                                            \
+} while (0)
+
+#define ADD_EQ(X, ARG, LEN) do {                                               \
+  if (a->is_used_[X]) {                                                        \
+    if (out->is_used_[X]) {                                                    \
+      VP8LAddVectorEq(a->ARG, out->ARG, (LEN));                                \
+    } else {                                                                   \
+      memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0]));           \
+    }                                                                          \
+  }                                                                            \
+} while (0)
+
+void VP8LHistogramAdd(const VP8LHistogram* const a,
+                      const VP8LHistogram* const b, VP8LHistogram* const out) {
  int i;
  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
  assert(a->palette_code_bits_ == b->palette_code_bits_);
+
  if (b != out) {
-    for (i = 0; i < literal_size; ++i) {
-      out->literal_[i] = a->literal_[i] + b->literal_[i];
-    }
-    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-      out->distance_[i] = a->distance_[i] + b->distance_[i];
-    }
-    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
-      out->red_[i] = a->red_[i] + b->red_[i];
-      out->blue_[i] = a->blue_[i] + b->blue_[i];
-      out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
+    ADD(0, literal_, literal_size);
+    ADD(1, red_, NUM_LITERAL_CODES);
+    ADD(2, blue_, NUM_LITERAL_CODES);
+    ADD(3, alpha_, NUM_LITERAL_CODES);
+    ADD(4, distance_, NUM_DISTANCE_CODES);
+    for (i = 0; i < 5; ++i) {
+      out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
    }
  } else {
-    for (i = 0; i < literal_size; ++i) {
-      out->literal_[i] += a->literal_[i];
-    }
-    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-      out->distance_[i] += a->distance_[i];
-    }
-    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
-      out->red_[i] += a->red_[i];
-      out->blue_[i] += a->blue_[i];
-      out->alpha_[i] += a->alpha_[i];
-    }
+    ADD_EQ(0, literal_, literal_size);
+    ADD_EQ(1, red_, NUM_LITERAL_CODES);
+    ADD_EQ(2, blue_, NUM_LITERAL_CODES);
+    ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
+    ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
+    for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
  }
 }
+#undef ADD
+#undef ADD_EQ

 //------------------------------------------------------------------------------
 // Image transforms.
@ -848,7 +877,8 @@ VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
 VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
 VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;

-VP8LHistogramAddFunc VP8LHistogramAdd;
+VP8LAddVectorFunc VP8LAddVector;
+VP8LAddVectorEqFunc VP8LAddVectorEq;

 VP8LVectorMismatchFunc VP8LVectorMismatch;
 VP8LBundleColorMapFunc VP8LBundleColorMap;
@ -885,7 +915,8 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
  VP8LGetEntropyUnrefined = GetEntropyUnrefined_C;
  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_C;

-  VP8LHistogramAdd = HistogramAdd_C;
+  VP8LAddVector = AddVector_C;
+  VP8LAddVectorEq = AddVectorEq_C;

  VP8LVectorMismatch = VectorMismatch_C;
  VP8LBundleColorMap = VP8LBundleColorMap_C;
@ -971,7 +1002,8 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
  assert(VP8LCombinedShannonEntropy != NULL);
  assert(VP8LGetEntropyUnrefined != NULL);
  assert(VP8LGetCombinedEntropyUnrefined != NULL);
-  assert(VP8LHistogramAdd != NULL);
+  assert(VP8LAddVector != NULL);
+  assert(VP8LAddVectorEq != NULL);
  assert(VP8LVectorMismatch != NULL);
  assert(VP8LBundleColorMap != NULL);
  assert(VP8LPredictorsSub[0] != NULL);
--- a/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
+++ b/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
@ -344,65 +344,29 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
    ASM_END_COMMON_0                                    \
    ASM_END_COMMON_1

-#define ADD_VECTOR(A, B, OUT, SIZE, EXTRA_SIZE)  do {   \
-  const uint32_t* pa = (const uint32_t*)(A);            \
-  const uint32_t* pb = (const uint32_t*)(B);            \
-  uint32_t* pout = (uint32_t*)(OUT);                    \
-  const uint32_t* const LoopEnd = pa + (SIZE);          \
-  assert((SIZE) % 4 == 0);                              \
-  ASM_START                                             \
-  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)              \
-  ASM_END_0                                             \
-  if ((EXTRA_SIZE) > 0) {                               \
-    const int last = (EXTRA_SIZE);                      \
-    int i;                                              \
-    for (i = 0; i < last; ++i) pout[i] = pa[i] + pb[i]; \
-  }                                                     \
-} while (0)
-
-#define ADD_VECTOR_EQ(A, OUT, SIZE, EXTRA_SIZE)  do {   \
-  const uint32_t* pa = (const uint32_t*)(A);            \
-  uint32_t* pout = (uint32_t*)(OUT);                    \
-  const uint32_t* const LoopEnd = pa + (SIZE);          \
-  assert((SIZE) % 4 == 0);                              \
-  ASM_START                                             \
-  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)            \
-  ASM_END_1                                             \
-  if ((EXTRA_SIZE) > 0) {                               \
-    const int last = (EXTRA_SIZE);                      \
-    int i;                                              \
-    for (i = 0; i < last; ++i) pout[i] += pa[i];        \
-  }                                                     \
-} while (0)
-
-static void HistogramAdd_MIPS32(const VP8LHistogram* const a,
-                                const VP8LHistogram* const b,
-                                VP8LHistogram* const out) {
+static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
+                             uint32_t* pout, int size) {
  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-  const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_)
-                             - (NUM_LITERAL_CODES + NUM_LENGTH_CODES);
-  assert(a->palette_code_bits_ == b->palette_code_bits_);
-
-  if (b != out) {
-    ADD_VECTOR(a->literal_, b->literal_, out->literal_,
-               NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
-    ADD_VECTOR(a->distance_, b->distance_, out->distance_,
-               NUM_DISTANCE_CODES, 0);
-    ADD_VECTOR(a->red_, b->red_, out->red_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
-  } else {
-    ADD_VECTOR_EQ(a->literal_, out->literal_,
-                  NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
-    ADD_VECTOR_EQ(a->distance_, out->distance_, NUM_DISTANCE_CODES, 0);
-    ADD_VECTOR_EQ(a->red_, out->red_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR_EQ(a->blue_, out->blue_, NUM_LITERAL_CODES, 0);
-    ADD_VECTOR_EQ(a->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
-  }
+  const uint32_t end = ((size) / 4) * 4;
+  const uint32_t* const LoopEnd = pa + end;
+  int i;
+  ASM_START
+  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
+  ASM_END_0
+  for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
+}
+
+static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
+  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+  const uint32_t end = ((size) / 4) * 4;
+  const uint32_t* const LoopEnd = pa + end;
+  int i;
+  ASM_START
+  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
+  ASM_END_1
+  for (i = end; i < size; ++i) pout[i] += pa[i];
 }

-#undef ADD_VECTOR_EQ
-#undef ADD_VECTOR
 #undef ASM_END_1
 #undef ASM_END_0
 #undef ASM_END_COMMON_1
@ -422,7 +386,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
  VP8LExtraCostCombined = ExtraCostCombined_MIPS32;
  VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
  VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
-  VP8LHistogramAdd = HistogramAdd_MIPS32;
+  VP8LAddVector = AddVector_MIPS32;
+  VP8LAddVectorEq = AddVectorEq_MIPS32;
 }

 #else  // !WEBP_USE_MIPS32
--- a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
@ -170,12 +170,13 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,

 //------------------------------------------------------------------------------

+// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
+// that's ok since the histogram values are less than 1<<28 (max picture size).
 #define LINE_SIZE 16    // 8 or 16
 static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
                           int size) {
  int i;
-  assert(size % LINE_SIZE == 0);
-  for (i = 0; i < size; i += LINE_SIZE) {
+  for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i +  0]);
    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i +  4]);
 #if (LINE_SIZE == 16)
@ -195,12 +196,14 @@ static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
    _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
 #endif
  }
+  for (; i < size; ++i) {
+    out[i] = a[i] + b[i];
+  }
 }

 static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
  int i;
-  assert(size % LINE_SIZE == 0);
-  for (i = 0; i < size; i += LINE_SIZE) {
+  for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i +  0]);
    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i +  4]);
 #if (LINE_SIZE == 16)
@ -220,35 +223,11 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
    _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
 #endif
  }
-}
-#undef LINE_SIZE
-
-// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
-// that's ok since the histogram values are less than 1<<28 (max picture size).
-static void HistogramAdd_SSE2(const VP8LHistogram* const a,
-                              const VP8LHistogram* const b,
-                              VP8LHistogram* const out) {
-  int i;
-  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
-  assert(a->palette_code_bits_ == b->palette_code_bits_);
-  if (b != out) {
-    AddVector_SSE2(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
-    AddVector_SSE2(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
-    AddVector_SSE2(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
-    AddVector_SSE2(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
-  } else {
-    AddVectorEq_SSE2(a->literal_, out->literal_, NUM_LITERAL_CODES);
-    AddVectorEq_SSE2(a->red_, out->red_, NUM_LITERAL_CODES);
-    AddVectorEq_SSE2(a->blue_, out->blue_, NUM_LITERAL_CODES);
-    AddVectorEq_SSE2(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
-  }
-  for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
-    out->literal_[i] = a->literal_[i] + b->literal_[i];
-  }
-  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-    out->distance_[i] = a->distance_[i] + b->distance_[i];
+  for (; i < size; ++i) {
+    out[i] += a[i];
  }
 }
+#undef LINE_SIZE

 //------------------------------------------------------------------------------
 // Entropy
@ -675,7 +654,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
  VP8LTransformColor = TransformColor_SSE2;
  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE2;
  VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2;
-  VP8LHistogramAdd = HistogramAdd_SSE2;
+  VP8LAddVector = AddVector_SSE2;
+  VP8LAddVectorEq = AddVectorEq_SSE2;
  VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2;
  VP8LVectorMismatch = VectorMismatch_SSE2;
  VP8LBundleColorMap = BundleColorMap_SSE2;
--- a/3rdparty/libwebp/src/dsp/msa_macro.h
+++ b/3rdparty/libwebp/src/dsp/msa_macro.h
@ -1389,4 +1389,4 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
 } while (0)
 #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)

-#endif  /* WEBP_DSP_MSA_MACRO_H_ */
+#endif  // WEBP_DSP_MSA_MACRO_H_
--- a/3rdparty/libwebp/src/dsp/quant.h
+++ b/3rdparty/libwebp/src/dsp/quant.h
@ -0,0 +1,70 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+
+#ifndef WEBP_DSP_QUANT_H_
+#define WEBP_DSP_QUANT_H_
+
+#include "src/dsp/dsp.h"
+#include "src/webp/types.h"
+
+#if defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) && \
+    !defined(WEBP_HAVE_NEON_RTCD)
+#include <arm_neon.h>
+
+#define IsFlat IsFlat_NEON
+
+static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
+  const uint64x2_t b = vpaddlq_u32(a);
+  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+                  vreinterpret_u32_u64(vget_high_u64(b)));
+}
+
+static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
+                              int thresh) {
+  const int16x8_t tst_ones = vdupq_n_s16(-1);
+  uint32x4_t sum = vdupq_n_u32(0);
+
+  for (int i = 0; i < num_blocks; ++i) {
+    // Set DC to zero.
+    const int16x8_t a_0 = vsetq_lane_s16(0, vld1q_s16(levels), 0);
+    const int16x8_t a_1 = vld1q_s16(levels + 8);
+
+    const uint16x8_t b_0 = vshrq_n_u16(vtstq_s16(a_0, tst_ones), 15);
+    const uint16x8_t b_1 = vshrq_n_u16(vtstq_s16(a_1, tst_ones), 15);
+
+    sum = vpadalq_u16(sum, b_0);
+    sum = vpadalq_u16(sum, b_1);
+
+    levels += 16;
+  }
+  return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0);
+}
+
+#else
+
+#define IsFlat IsFlat_C
+
+static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
+                              int thresh) {
+  int score = 0;
+  while (num_blocks-- > 0) {      // TODO(skal): refine positional scoring?
+    int i;
+    for (i = 1; i < 16; ++i) {    // omit DC, we're only interested in AC
+      score += (levels[i] != 0);
+      if (score > thresh) return 0;
+    }
+    levels += 16;
+  }
+  return 1;
+}
+
+#endif  // defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) &&
+        // !defined(WEBP_HAVE_NEON_RTCD)
+
+#endif  // WEBP_DSP_QUANT_H_
--- a/3rdparty/libwebp/src/dsp/rescaler.c
+++ b/3rdparty/libwebp/src/dsp/rescaler.c
@ -21,6 +21,7 @@

 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)

 //------------------------------------------------------------------------------
 // Row import
@ -138,7 +139,7 @@ void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
  if (yscale) {
    for (x_out = 0; x_out < x_out_max; ++x_out) {
      const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
-      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+      const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale);
      assert(v >= 0 && v <= 255);
      dst[x_out] = v;
      irow[x_out] = frac;   // new fractional start
@ -153,6 +154,7 @@ void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
  }
 }

+#undef MULT_FIX_FLOOR
 #undef MULT_FIX
 #undef ROUNDER

--- a/3rdparty/libwebp/src/dsp/rescaler_mips32.c
+++ b/3rdparty/libwebp/src/dsp/rescaler_mips32.c
@ -209,6 +209,7 @@ static void ExportRowExpand_MIPS32(WebPRescaler* const wrk) {
  }
 }

+#if 0  // disabled for now. TODO(skal): make match the C-code
 static void ExportRowShrink_MIPS32(WebPRescaler* const wrk) {
  const int x_out_max = wrk->dst_width * wrk->num_channels;
  uint8_t* dst = wrk->dst;
@ -273,6 +274,7 @@ static void ExportRowShrink_MIPS32(WebPRescaler* const wrk) {
    );
  }
 }
+#endif  // 0

 //------------------------------------------------------------------------------
 // Entry point
@ -283,7 +285,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) {
  WebPRescalerImportRowExpand = ImportRowExpand_MIPS32;
  WebPRescalerImportRowShrink = ImportRowShrink_MIPS32;
  WebPRescalerExportRowExpand = ExportRowExpand_MIPS32;
-  WebPRescalerExportRowShrink = ExportRowShrink_MIPS32;
+//  WebPRescalerExportRowShrink = ExportRowShrink_MIPS32;
 }

 #else  // !WEBP_USE_MIPS32
--- a/3rdparty/libwebp/src/dsp/rescaler_mips_dsp_r2.c
+++ b/3rdparty/libwebp/src/dsp/rescaler_mips_dsp_r2.c
@ -20,10 +20,12 @@

 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)

 //------------------------------------------------------------------------------
 // Row export

+#if 0  // disabled for now. TODO(skal): make match the C-code
 static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
  int i;
  const int x_out_max = wrk->dst_width * wrk->num_channels;
@ -106,7 +108,7 @@ static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
    }
    for (i = 0; i < (x_out_max & 0x3); ++i) {
      const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
-      const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
+      const int v = (int)MULT_FIX_FLOOR(*irow - frac, wrk->fxy_scale);
      assert(v >= 0 && v <= 255);
      *dst++ = v;
      *irow++ = frac;   // new fractional start
@ -154,13 +156,14 @@ static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
      );
    }
    for (i = 0; i < (x_out_max & 0x3); ++i) {
-      const int v = (int)MULT_FIX(*irow, wrk->fxy_scale);
+      const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
      assert(v >= 0 && v <= 255);
      *dst++ = v;
      *irow++ = 0;
    }
  }
 }
+#endif  // 0

 static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
  int i;
@ -294,6 +297,7 @@ static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
  }
 }

+#undef MULT_FIX_FLOOR
 #undef MULT_FIX
 #undef ROUNDER

@ -304,7 +308,7 @@ extern void WebPRescalerDspInitMIPSdspR2(void);

 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
  WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
-  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
+//  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
 }

 #else  // !WEBP_USE_MIPS_DSP_R2
--- a/3rdparty/libwebp/src/dsp/rescaler_msa.c
+++ b/3rdparty/libwebp/src/dsp/rescaler_msa.c
@ -22,6 +22,7 @@

 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)

 #define CALC_MULT_FIX_16(in0, in1, in2, in3, scale, shift, dst) do {  \
  v4u32 tmp0, tmp1, tmp2, tmp3;                                       \
@ -262,6 +263,7 @@ static void RescalerExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
  }
 }

+#if 0  // disabled for now. TODO(skal): make match the C-code
 static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
                                          uint8_t* dst, int length,
                                          const uint32_t yscale,
@ -341,7 +343,7 @@ static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
    }
    for (x_out = 0; x_out < length; ++x_out) {
      const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
-      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+      const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale);
      assert(v >= 0 && v <= 255);
      dst[x_out] = v;
      irow[x_out] = frac;
@ -426,6 +428,7 @@ static void RescalerExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
    ExportRowShrink_1(irow, dst, x_out_max, wrk);
  }
 }
+#endif  // 0

 //------------------------------------------------------------------------------
 // Entry point
@ -434,7 +437,7 @@ extern void WebPRescalerDspInitMSA(void);

 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMSA(void) {
  WebPRescalerExportRowExpand = RescalerExportRowExpand_MIPSdspR2;
-  WebPRescalerExportRowShrink = RescalerExportRowShrink_MIPSdspR2;
+//  WebPRescalerExportRowShrink = RescalerExportRowShrink_MIPSdspR2;
 }

 #else     // !WEBP_USE_MSA
--- a/3rdparty/libwebp/src/dsp/rescaler_neon.c
+++ b/3rdparty/libwebp/src/dsp/rescaler_neon.c
@ -22,6 +22,7 @@

 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX_C(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+#define MULT_FIX_FLOOR_C(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)

 #define LOAD_32x4(SRC, DST) const uint32x4_t DST = vld1q_u32((SRC))
 #define LOAD_32x8(SRC, DST0, DST1)                                    \
@ -35,8 +36,11 @@

 #if (WEBP_RESCALER_RFIX == 32)
 #define MAKE_HALF_CST(C) vdupq_n_s32((int32_t)((C) >> 1))
-#define MULT_FIX(A, B) /* note: B is actualy scale>>1. See MAKE_HALF_CST */ \
+// note: B is actualy scale>>1. See MAKE_HALF_CST
+#define MULT_FIX(A, B) \
    vreinterpretq_u32_s32(vqrdmulhq_s32(vreinterpretq_s32_u32((A)), (B)))
+#define MULT_FIX_FLOOR(A, B) \
+    vreinterpretq_u32_s32(vqdmulhq_s32(vreinterpretq_s32_u32((A)), (B)))
 #else
 #error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
 #endif
@ -135,8 +139,8 @@ static void RescalerExportRowShrink_NEON(WebPRescaler* const wrk) {
      const uint32x4_t A1 = MULT_FIX(in1, yscale_half);
      const uint32x4_t B0 = vqsubq_u32(in2, A0);
      const uint32x4_t B1 = vqsubq_u32(in3, A1);
-      const uint32x4_t C0 = MULT_FIX(B0, fxy_scale_half);
-      const uint32x4_t C1 = MULT_FIX(B1, fxy_scale_half);
+      const uint32x4_t C0 = MULT_FIX_FLOOR(B0, fxy_scale_half);
+      const uint32x4_t C1 = MULT_FIX_FLOOR(B1, fxy_scale_half);
      const uint16x4_t D0 = vmovn_u32(C0);
      const uint16x4_t D1 = vmovn_u32(C1);
      const uint8x8_t E = vmovn_u16(vcombine_u16(D0, D1));
@ -145,7 +149,7 @@ static void RescalerExportRowShrink_NEON(WebPRescaler* const wrk) {
    }
    for (; x_out < x_out_max; ++x_out) {
      const uint32_t frac = (uint32_t)MULT_FIX_C(frow[x_out], yscale);
-      const int v = (int)MULT_FIX_C(irow[x_out] - frac, wrk->fxy_scale);
+      const int v = (int)MULT_FIX_FLOOR_C(irow[x_out] - frac, fxy_scale);
      assert(v >= 0 && v <= 255);
      dst[x_out] = v;
      irow[x_out] = frac;   // new fractional start
@ -170,6 +174,12 @@ static void RescalerExportRowShrink_NEON(WebPRescaler* const wrk) {
  }
 }

+#undef MULT_FIX_FLOOR_C
+#undef MULT_FIX_C
+#undef MULT_FIX_FLOOR
+#undef MULT_FIX
+#undef ROUNDER
+
 //------------------------------------------------------------------------------

 extern void WebPRescalerDspInitNEON(void);
--- a/3rdparty/libwebp/src/dsp/rescaler_sse2.c
+++ b/3rdparty/libwebp/src/dsp/rescaler_sse2.c
@ -25,6 +25,7 @@

 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)

 // input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0
 static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) {
@ -224,6 +225,35 @@ static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
  _mm_storel_epi64((__m128i*)dst, G);
 }

+static WEBP_INLINE void ProcessRow_Floor_SSE2(const __m128i* const A0,
+                                              const __m128i* const A1,
+                                              const __m128i* const A2,
+                                              const __m128i* const A3,
+                                              const __m128i* const mult,
+                                              uint8_t* const dst) {
+  const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
+  const __m128i B0 = _mm_mul_epu32(*A0, *mult);
+  const __m128i B1 = _mm_mul_epu32(*A1, *mult);
+  const __m128i B2 = _mm_mul_epu32(*A2, *mult);
+  const __m128i B3 = _mm_mul_epu32(*A3, *mult);
+  const __m128i D0 = _mm_srli_epi64(B0, WEBP_RESCALER_RFIX);
+  const __m128i D1 = _mm_srli_epi64(B1, WEBP_RESCALER_RFIX);
+#if (WEBP_RESCALER_RFIX < 32)
+  const __m128i D2 =
+      _mm_and_si128(_mm_slli_epi64(B2, 32 - WEBP_RESCALER_RFIX), mask);
+  const __m128i D3 =
+      _mm_and_si128(_mm_slli_epi64(B3, 32 - WEBP_RESCALER_RFIX), mask);
+#else
+  const __m128i D2 = _mm_and_si128(B2, mask);
+  const __m128i D3 = _mm_and_si128(B3, mask);
+#endif
+  const __m128i E0 = _mm_or_si128(D0, D2);
+  const __m128i E1 = _mm_or_si128(D1, D3);
+  const __m128i F = _mm_packs_epi32(E0, E1);
+  const __m128i G = _mm_packus_epi16(F, F);
+  _mm_storel_epi64((__m128i*)dst, G);
+}
+
 static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
  int x_out;
  uint8_t* const dst = wrk->dst;
@ -322,12 +352,12 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
        const __m128i G1 = _mm_or_si128(D1, F3);
        _mm_storeu_si128((__m128i*)(irow + x_out + 0), G0);
        _mm_storeu_si128((__m128i*)(irow + x_out + 4), G1);
-        ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
+        ProcessRow_Floor_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
      }
    }
    for (; x_out < x_out_max; ++x_out) {
      const uint32_t frac = (int)MULT_FIX(frow[x_out], yscale);
-      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+      const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale);
      assert(v >= 0 && v <= 255);
      dst[x_out] = v;
      irow[x_out] = frac;   // new fractional start
@ -352,6 +382,7 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
  }
 }

+#undef MULT_FIX_FLOOR
 #undef MULT_FIX
 #undef ROUNDER

--- a/3rdparty/libwebp/src/dsp/yuv.h
+++ b/3rdparty/libwebp/src/dsp/yuv.h
@ -207,4 +207,4 @@ static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
 }    // extern "C"
 #endif

-#endif  /* WEBP_DSP_YUV_H_ */
+#endif  // WEBP_DSP_YUV_H_
--- a/3rdparty/libwebp/src/enc/analysis_enc.c
+++ b/3rdparty/libwebp/src/enc/analysis_enc.c
@ -458,7 +458,7 @@ static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
  dst->uv_alpha += src->uv_alpha;
 }

-// initialize the job struct with some TODOs
+// initialize the job struct with some tasks to perform
 static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
                           int start_row, int end_row) {
  WebPGetWorkerInterface()->Init(&job->worker);
--- a/3rdparty/libwebp/src/enc/backward_references_cost_enc.c
+++ b/3rdparty/libwebp/src/enc/backward_references_cost_enc.c
@ -67,7 +67,7 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,

  // The following code is similar to VP8LHistogramCreate but converts the
  // distance to plane code.
-  VP8LHistogramInit(histo, cache_bits);
+  VP8LHistogramInit(histo, cache_bits, /*init_arrays=*/ 1);
  while (VP8LRefsCursorOk(&c)) {
    VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos, VP8LDistanceToPlaneCode,
                                    xsize);
--- a/3rdparty/libwebp/src/enc/backward_references_enc.c
+++ b/3rdparty/libwebp/src/enc/backward_references_enc.c
@ -715,6 +715,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
  for (i = 0; i <= cache_bits_max; ++i) {
    histos[i] = VP8LAllocateHistogram(i);
    if (histos[i] == NULL) goto Error;
+    VP8LHistogramInit(histos[i], i, /*init_arrays=*/ 1);
    if (i == 0) continue;
    cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
    if (!cc_init[i]) goto Error;
--- a/3rdparty/libwebp/src/enc/cost_enc.h
+++ b/3rdparty/libwebp/src/enc/cost_enc.h
@ -79,4 +79,4 @@ extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
 }    // extern "C"
 #endif

-#endif  /* WEBP_ENC_COST_ENC_H_ */
+#endif  // WEBP_ENC_COST_ENC_H_
--- a/3rdparty/libwebp/src/enc/histogram_enc.c
+++ b/3rdparty/libwebp/src/enc/histogram_enc.c
@ -51,10 +51,12 @@ static void HistogramCopy(const VP8LHistogram* const src,
                          VP8LHistogram* const dst) {
  uint32_t* const dst_literal = dst->literal_;
  const int dst_cache_bits = dst->palette_code_bits_;
+  const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);
  const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
  assert(src->palette_code_bits_ == dst_cache_bits);
  memcpy(dst, src, histo_size);
  dst->literal_ = dst_literal;
+  memcpy(dst->literal_, src->literal_, literal_size * sizeof(*dst->literal_));
 }

 int VP8LGetHistogramSize(int cache_bits) {
@ -91,9 +93,19 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
  VP8LHistogramStoreRefs(refs, p);
 }

-void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
+                       int init_arrays) {
  p->palette_code_bits_ = palette_code_bits;
-  HistogramClear(p);
+  if (init_arrays) {
+    HistogramClear(p);
+  } else {
+    p->trivial_symbol_ = 0;
+    p->bit_cost_ = 0.;
+    p->literal_cost_ = 0.;
+    p->red_cost_ = 0.;
+    p->blue_cost_ = 0.;
+    memset(p->is_used_, 0, sizeof(p->is_used_));
+  }
 }

 VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
@ -104,37 +116,84 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
  histo = (VP8LHistogram*)memory;
  // literal_ won't necessary be aligned.
  histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
-  VP8LHistogramInit(histo, cache_bits);
+  VP8LHistogramInit(histo, cache_bits, /*init_arrays=*/ 0);
  return histo;
 }

+// Resets the pointers of the histograms to point to the bit buffer in the set.
+static void HistogramSetResetPointers(VP8LHistogramSet* const set,
+                                      int cache_bits) {
+  int i;
+  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  uint8_t* memory = (uint8_t*) (set->histograms);
+  memory += set->max_size * sizeof(*set->histograms);
+  for (i = 0; i < set->max_size; ++i) {
+    memory = (uint8_t*) WEBP_ALIGN(memory);
+    set->histograms[i] = (VP8LHistogram*) memory;
+    // literal_ won't necessary be aligned.
+    set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
+    memory += histo_size;
+  }
+}
+
+// Returns the total size of the VP8LHistogramSet.
+static size_t HistogramSetTotalSize(int size, int cache_bits) {
+  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  return (sizeof(VP8LHistogramSet) + size * (sizeof(VP8LHistogram*) +
+          histo_size + WEBP_ALIGN_CST));
+}
+
 VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
  int i;
  VP8LHistogramSet* set;
-  const int histo_size = VP8LGetHistogramSize(cache_bits);
-  const size_t total_size =
-      sizeof(*set) + size * (sizeof(*set->histograms) +
-      histo_size + WEBP_ALIGN_CST);
+  const size_t total_size = HistogramSetTotalSize(size, cache_bits);
  uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
  if (memory == NULL) return NULL;

  set = (VP8LHistogramSet*)memory;
  memory += sizeof(*set);
  set->histograms = (VP8LHistogram**)memory;
-  memory += size * sizeof(*set->histograms);
  set->max_size = size;
  set->size = size;
+  HistogramSetResetPointers(set, cache_bits);
  for (i = 0; i < size; ++i) {
-    memory = (uint8_t*)WEBP_ALIGN(memory);
-    set->histograms[i] = (VP8LHistogram*)memory;
-    // literal_ won't necessary be aligned.
-    set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
-    VP8LHistogramInit(set->histograms[i], cache_bits);
-    memory += histo_size;
+    VP8LHistogramInit(set->histograms[i], cache_bits, /*init_arrays=*/ 0);
  }
  return set;
 }

+void VP8LHistogramSetClear(VP8LHistogramSet* const set) {
+  int i;
+  const int cache_bits = set->histograms[0]->palette_code_bits_;
+  const int size = set->max_size;
+  const size_t total_size = HistogramSetTotalSize(size, cache_bits);
+  uint8_t* memory = (uint8_t*)set;
+
+  memset(memory, 0, total_size);
+  memory += sizeof(*set);
+  set->histograms = (VP8LHistogram**)memory;
+  set->max_size = size;
+  set->size = size;
+  HistogramSetResetPointers(set, cache_bits);
+  for (i = 0; i < size; ++i) {
+    set->histograms[i]->palette_code_bits_ = cache_bits;
+  }
+}
+
+// Removes the histogram 'i' from 'set' by setting it to NULL.
+static void HistogramSetRemoveHistogram(VP8LHistogramSet* const set, int i,
+                                        int* const num_used) {
+  assert(set->histograms[i] != NULL);
+  set->histograms[i] = NULL;
+  --*num_used;
+  // If we remove the last valid one, shrink until the next valid one.
+  if (i == set->size - 1) {
+    while (set->size >= 1 && set->histograms[set->size - 1] == NULL) {
+      --set->size;
+    }
+  }
+}
+
 // -----------------------------------------------------------------------------

 void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
@ -237,7 +296,8 @@ static double FinalHuffmanCost(const VP8LStreaks* const stats) {
 // Get the symbol entropy for the distribution 'population'.
 // Set 'trivial_sym', if there's only one symbol present in the distribution.
 static double PopulationCost(const uint32_t* const population, int length,
-                             uint32_t* const trivial_sym) {
+                             uint32_t* const trivial_sym,
+                             uint8_t* const is_used) {
  VP8LBitEntropy bit_entropy;
  VP8LStreaks stats;
  VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
@ -245,6 +305,8 @@ static double PopulationCost(const uint32_t* const population, int length,
    *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code
                                               : VP8L_NON_TRIVIAL_SYM;
  }
+  // The histogram is used if there is at least one non-zero streak.
+  *is_used = (stats.streaks[1][0] != 0 || stats.streaks[1][1] != 0);

  return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
 }
@ -253,7 +315,9 @@ static double PopulationCost(const uint32_t* const population, int length,
 // non-zero: both the zero-th one, or both the last one.
 static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
                                             const uint32_t* const Y,
-                                             int length, int trivial_at_end) {
+                                             int length, int is_X_used,
+                                             int is_Y_used,
+                                             int trivial_at_end) {
  VP8LStreaks stats;
  if (trivial_at_end) {
    // This configuration is due to palettization that transforms an indexed
@ -262,28 +326,43 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
    // Only FinalHuffmanCost needs to be evaluated.
    memset(&stats, 0, sizeof(stats));
    // Deal with the non-zero value at index 0 or length-1.
-    stats.streaks[1][0] += 1;
+    stats.streaks[1][0] = 1;
    // Deal with the following/previous zero streak.
-    stats.counts[0] += 1;
-    stats.streaks[0][1] += length - 1;
+    stats.counts[0] = 1;
+    stats.streaks[0][1] = length - 1;
    return FinalHuffmanCost(&stats);
  } else {
    VP8LBitEntropy bit_entropy;
-    VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
+    if (is_X_used) {
+      if (is_Y_used) {
+        VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
+      } else {
+        VP8LGetEntropyUnrefined(X, length, &bit_entropy, &stats);
+      }
+    } else {
+      if (is_Y_used) {
+        VP8LGetEntropyUnrefined(Y, length, &bit_entropy, &stats);
+      } else {
+        memset(&stats, 0, sizeof(stats));
+        stats.counts[0] = 1;
+        stats.streaks[0][length > 3] = length;
+        VP8LBitEntropyInit(&bit_entropy);
+      }
+    }

    return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
  }
 }

 // Estimates the Entropy + Huffman + other block overhead size cost.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
  return
-      PopulationCost(
-          p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
-      + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
-      + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
-      + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
-      + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
+      PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
+                     NULL, &p->is_used_[0])
+      + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1])
+      + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2])
+      + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3])
+      + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4])
      + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
      + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
 }
@ -299,7 +378,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
  int trivial_at_end = 0;
  assert(a->palette_code_bits_ == b->palette_code_bits_);
  *cost += GetCombinedEntropy(a->literal_, b->literal_,
-                              VP8LHistogramNumCodes(palette_code_bits), 0);
+                              VP8LHistogramNumCodes(palette_code_bits),
+                              a->is_used_[0], b->is_used_[0], 0);
  *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
                                 b->literal_ + NUM_LITERAL_CODES,
                                 NUM_LENGTH_CODES);
@ -319,19 +399,23 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
  }

  *cost +=
-      GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end);
+      GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, a->is_used_[1],
+                         b->is_used_[1], trivial_at_end);
  if (*cost > cost_threshold) return 0;

  *cost +=
-      GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end);
+      GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, a->is_used_[2],
+                         b->is_used_[2], trivial_at_end);
  if (*cost > cost_threshold) return 0;

-  *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
-                              trivial_at_end);
+  *cost +=
+      GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
+                         a->is_used_[3], b->is_used_[3], trivial_at_end);
  if (*cost > cost_threshold) return 0;

  *cost +=
-      GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, 0);
+      GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
+                         a->is_used_[4], b->is_used_[4], 0);
  *cost +=
      VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
  if (*cost > cost_threshold) return 0;
@ -377,7 +461,9 @@ static double HistogramAddEval(const VP8LHistogram* const a,
 static double HistogramAddThresh(const VP8LHistogram* const a,
                                 const VP8LHistogram* const b,
                                 double cost_threshold) {
-  double cost = -a->bit_cost_;
+  double cost;
+  assert(a != NULL && b != NULL);
+  cost = -a->bit_cost_;
  GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
  return cost;
 }
@ -419,16 +505,19 @@ static void UpdateDominantCostRange(
 static void UpdateHistogramCost(VP8LHistogram* const h) {
  uint32_t alpha_sym, red_sym, blue_sym;
  const double alpha_cost =
-      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
+      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
+                     &h->is_used_[3]);
  const double distance_cost =
-      PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
+      PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
      VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
  const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
-  h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +
-                     VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
-                                   NUM_LENGTH_CODES);
-  h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
-  h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
+  h->literal_cost_ =
+      PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
+          VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
+  h->red_cost_ =
+      PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
+  h->blue_cost_ =
+      PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym, &h->is_used_[2]);
  h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
                 alpha_cost + distance_cost;
  if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
@ -473,6 +562,7 @@ static void HistogramBuild(
  VP8LHistogram** const histograms = image_histo->histograms;
  VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
  assert(histo_bits > 0);
+  VP8LHistogramSetClear(image_histo);
  while (VP8LRefsCursorOk(&c)) {
    const PixOrCopy* const v = c.cur_pos;
    const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
@ -487,17 +577,37 @@ static void HistogramBuild(
 }

 // Copies the histograms and computes its bit_cost.
-static void HistogramCopyAndAnalyze(
-    VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) {
-  int i;
-  const int histo_size = orig_histo->size;
+static const uint16_t kInvalidHistogramSymbol = (uint16_t)(-1);
+static void HistogramCopyAndAnalyze(VP8LHistogramSet* const orig_histo,
+                                    VP8LHistogramSet* const image_histo,
+                                    int* const num_used,
+                                    uint16_t* const histogram_symbols) {
+  int i, cluster_id;
+  int num_used_orig = *num_used;
  VP8LHistogram** const orig_histograms = orig_histo->histograms;
  VP8LHistogram** const histograms = image_histo->histograms;
-  for (i = 0; i < histo_size; ++i) {
+  assert(image_histo->max_size == orig_histo->max_size);
+  for (cluster_id = 0, i = 0; i < orig_histo->max_size; ++i) {
    VP8LHistogram* const histo = orig_histograms[i];
    UpdateHistogramCost(histo);
-    // Copy histograms from orig_histo[] to image_histo[].
-    HistogramCopy(histo, histograms[i]);
+
+    // Skip the histogram if it is completely empty, which can happen for tiles
+    // with no information (when they are skipped because of LZ77).
+    if (!histo->is_used_[0] && !histo->is_used_[1] && !histo->is_used_[2]
+        && !histo->is_used_[3] && !histo->is_used_[4]) {
+      // The first histogram is always used. If an histogram is empty, we set
+      // its id to be the same as the previous one: this will improve
+      // compressibility for later LZ77.
+      assert(i > 0);
+      HistogramSetRemoveHistogram(image_histo, i, num_used);
+      HistogramSetRemoveHistogram(orig_histo, i, &num_used_orig);
+      histogram_symbols[i] = kInvalidHistogramSymbol;
+    } else {
+      // Copy histograms from orig_histo[] to image_histo[].
+      HistogramCopy(histo, histograms[i]);
+      histogram_symbols[i] = cluster_id++;
+      assert(cluster_id <= image_histo->max_size);
+    }
  }
 }

@ -514,29 +624,33 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,

  // Analyze the dominant (literal, red and blue) entropy costs.
  for (i = 0; i < histo_size; ++i) {
+    if (histograms[i] == NULL) continue;
    UpdateDominantCostRange(histograms[i], &cost_range);
  }

  // bin-hash histograms on three of the dominant (literal, red and blue)
  // symbol costs and store the resulting bin_id for each histogram.
  for (i = 0; i < histo_size; ++i) {
+    // bin_map[i] is not set to a special value as its use will later be guarded
+    // by another (histograms[i] == NULL).
+    if (histograms[i] == NULL) continue;
    bin_map[i] = GetHistoBinIndex(histograms[i], &cost_range, low_effort);
  }
 }

-// Compact image_histo[] by merging some histograms with same bin_id together if
-// it's advantageous.
+// Merges some histograms with same bin_id together if it's advantageous.
+// Sets the remaining histograms to NULL.
 static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
+                                       int *num_used,
+                                       const uint16_t* const clusters,
+                                       uint16_t* const cluster_mappings,
                                       VP8LHistogram* cur_combo,
                                       const uint16_t* const bin_map,
-                                       int bin_map_size, int num_bins,
+                                       int num_bins,
                                       double combine_cost_factor,
                                       int low_effort) {
  VP8LHistogram** const histograms = image_histo->histograms;
  int idx;
-  // Work in-place: processed histograms are put at the beginning of
-  // image_histo[]. At the end, we just have to truncate the array.
-  int size = 0;
  struct {
    int16_t first;    // position of the histogram that accumulates all
                      // histograms with the same bin_id
@ -549,16 +663,19 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
    bin_info[idx].num_combine_failures = 0;
  }

-  for (idx = 0; idx < bin_map_size; ++idx) {
-    const int bin_id = bin_map[idx];
-    const int first = bin_info[bin_id].first;
-    assert(size <= idx);
+  // By default, a cluster matches itself.
+  for (idx = 0; idx < *num_used; ++idx) cluster_mappings[idx] = idx;
+  for (idx = 0; idx < image_histo->size; ++idx) {
+    int bin_id, first;
+    if (histograms[idx] == NULL) continue;
+    bin_id = bin_map[idx];
+    first = bin_info[bin_id].first;
    if (first == -1) {
-      // just move histogram #idx to its final position
-      histograms[size] = histograms[idx];
-      bin_info[bin_id].first = size++;
+      bin_info[bin_id].first = idx;
    } else if (low_effort) {
      HistogramAdd(histograms[idx], histograms[first], histograms[first]);
+      HistogramSetRemoveHistogram(image_histo, idx, num_used);
+      cluster_mappings[clusters[idx]] = clusters[first];
    } else {
      // try to merge #idx into #first (both share the same bin_id)
      const double bit_cost = histograms[idx]->bit_cost_;
@ -581,19 +698,18 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
            bin_info[bin_id].num_combine_failures >= max_combine_failures) {
          // move the (better) merged histogram to its final slot
          HistogramSwap(&cur_combo, &histograms[first]);
+          HistogramSetRemoveHistogram(image_histo, idx, num_used);
+          cluster_mappings[clusters[idx]] = clusters[first];
        } else {
-          histograms[size++] = histograms[idx];
          ++bin_info[bin_id].num_combine_failures;
        }
-      } else {
-        histograms[size++] = histograms[idx];
      }
    }
  }
-  image_histo->size = size;
  if (low_effort) {
    // for low_effort case, update the final cost when everything is merged
-    for (idx = 0; idx < size; ++idx) {
+    for (idx = 0; idx < image_histo->size; ++idx) {
+      if (histograms[idx] == NULL) continue;
      UpdateHistogramCost(histograms[idx]);
    }
  }
@ -624,16 +740,9 @@ typedef struct {
  int max_size;
 } HistoQueue;

-static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {
+static int HistoQueueInit(HistoQueue* const histo_queue, const int max_size) {
  histo_queue->size = 0;
-  // max_index^2 for the queue size is safe. If you look at
-  // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes
-  // data to the queue, you insert at most:
-  // - max_index*(max_index-1)/2 (the first two for loops)
-  // - max_index - 1 in the last for loop at the first iteration of the while
-  //   loop, max_index - 2 at the second iteration ... therefore
-  //   max_index*(max_index-1)/2 overall too
-  histo_queue->max_size = max_index * max_index;
+  histo_queue->max_size = max_size;
  // We allocate max_size + 1 because the last element at index "size" is
  // used as temporary data (and it could be up to max_size).
  histo_queue->queue = (HistogramPair*)WebPSafeMalloc(
@ -674,6 +783,18 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
  }
 }

+// Update the cost diff and combo of a pair of histograms. This needs to be
+// called when the the histograms have been merged with a third one.
+static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
+                                 const VP8LHistogram* const h2,
+                                 double threshold,
+                                 HistogramPair* const pair) {
+  const double sum_cost = h1->bit_cost_ + h2->bit_cost_;
+  pair->cost_combo = 0.;
+  GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
+  pair->cost_diff = pair->cost_combo - sum_cost;
+}
+
 // Create a pair from indices "idx1" and "idx2" provided its cost
 // is inferior to "threshold", a negative entropy.
 // It returns the cost of the pair, or 0. if it superior to threshold.
@ -683,8 +804,9 @@ static double HistoQueuePush(HistoQueue* const histo_queue,
  const VP8LHistogram* h1;
  const VP8LHistogram* h2;
  HistogramPair pair;
-  double sum_cost;

+  // Stop here if the queue is full.
+  if (histo_queue->size == histo_queue->max_size) return 0.;
  assert(threshold <= 0.);
  if (idx1 > idx2) {
    const int tmp = idx2;
@ -695,16 +817,12 @@ static double HistoQueuePush(HistoQueue* const histo_queue,
  pair.idx2 = idx2;
  h1 = histograms[idx1];
  h2 = histograms[idx2];
-  sum_cost = h1->bit_cost_ + h2->bit_cost_;
-  pair.cost_combo = 0.;
-  GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair.cost_combo);
-  pair.cost_diff = pair.cost_combo - sum_cost;
+
+  HistoQueueUpdatePair(h1, h2, threshold, &pair);

  // Do not even consider the pair if it does not improve the entropy.
  if (pair.cost_diff >= threshold) return 0.;

-  // We cannot add more elements than the capacity.
-  assert(histo_queue->size < histo_queue->max_size);
  histo_queue->queue[histo_queue->size++] = pair;
  HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]);

@ -715,42 +833,43 @@ static double HistoQueuePush(HistoQueue* const histo_queue,

 // Combines histograms by continuously choosing the one with the highest cost
 // reduction.
-static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
+static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
+                                  int* const num_used) {
  int ok = 0;
-  int image_histo_size = image_histo->size;
+  const int image_histo_size = image_histo->size;
  int i, j;
  VP8LHistogram** const histograms = image_histo->histograms;
-  // Indexes of remaining histograms.
-  int* const clusters =
-      (int*)WebPSafeMalloc(image_histo_size, sizeof(*clusters));
  // Priority queue of histogram pairs.
  HistoQueue histo_queue;

-  if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) {
+  // image_histo_size^2 for the queue size is safe. If you look at
+  // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes
+  // data to the queue, you insert at most:
+  // - image_histo_size*(image_histo_size-1)/2 (the first two for loops)
+  // - image_histo_size - 1 in the last for loop at the first iteration of
+  //   the while loop, image_histo_size - 2 at the second iteration ...
+  //   therefore image_histo_size*(image_histo_size-1)/2 overall too
+  if (!HistoQueueInit(&histo_queue, image_histo_size * image_histo_size)) {
    goto End;
  }

  for (i = 0; i < image_histo_size; ++i) {
-    // Initialize clusters indexes.
-    clusters[i] = i;
+    if (image_histo->histograms[i] == NULL) continue;
    for (j = i + 1; j < image_histo_size; ++j) {
-      // Initialize positions array.
+      // Initialize queue.
+      if (image_histo->histograms[j] == NULL) continue;
      HistoQueuePush(&histo_queue, histograms, i, j, 0.);
    }
  }

-  while (image_histo_size > 1 && histo_queue.size > 0) {
+  while (histo_queue.size > 0) {
    const int idx1 = histo_queue.queue[0].idx1;
    const int idx2 = histo_queue.queue[0].idx2;
    HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
    histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo;
+
    // Remove merged histogram.
-    for (i = 0; i + 1 < image_histo_size; ++i) {
-      if (clusters[i] >= idx2) {
-        clusters[i] = clusters[i + 1];
-      }
-    }
-    --image_histo_size;
+    HistogramSetRemoveHistogram(image_histo, idx2, num_used);

    // Remove pairs intersecting the just combined best pair.
    for (i = 0; i < histo_queue.size;) {
@ -765,24 +884,15 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
    }

    // Push new pairs formed with combined histogram to the queue.
-    for (i = 0; i < image_histo_size; ++i) {
-      if (clusters[i] != idx1) {
-        HistoQueuePush(&histo_queue, histograms, idx1, clusters[i], 0.);
-      }
-    }
-  }
-  // Move remaining histograms to the beginning of the array.
-  for (i = 0; i < image_histo_size; ++i) {
-    if (i != clusters[i]) {  // swap the two histograms
-      HistogramSwap(&histograms[i], &histograms[clusters[i]]);
+    for (i = 0; i < image_histo->size; ++i) {
+      if (i == idx1 || image_histo->histograms[i] == NULL) continue;
+      HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0.);
    }
  }

-  image_histo->size = image_histo_size;
  ok = 1;

 End:
-  WebPSafeFree(clusters);
  HistoQueueClear(&histo_queue);
  return ok;
 }
@ -790,47 +900,69 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
 // Perform histogram aggregation using a stochastic approach.
 // 'do_greedy' is set to 1 if a greedy approach needs to be performed
 // afterwards, 0 otherwise.
+static int PairComparison(const void* idx1, const void* idx2) {
+  // To be used with bsearch: <0 when *idx1<*idx2, >0 if >, 0 when ==.
+  return (*(int*) idx1 - *(int*) idx2);
+}
 static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
-                                      int min_cluster_size,
+                                      int* const num_used, int min_cluster_size,
                                      int* const do_greedy) {
-  int iter;
+  int j, iter;
  uint32_t seed = 1;
  int tries_with_no_success = 0;
-  int image_histo_size = image_histo->size;
-  const int outer_iters = image_histo_size;
+  const int outer_iters = *num_used;
  const int num_tries_no_success = outer_iters / 2;
  VP8LHistogram** const histograms = image_histo->histograms;
-  // Priority queue of histogram pairs. Its size of "kCostHeapSizeSqrt"^2
+  // Priority queue of histogram pairs. Its size of 'kHistoQueueSize'
  // impacts the quality of the compression and the speed: the smaller the
  // faster but the worse for the compression.
  HistoQueue histo_queue;
-  const int kHistoQueueSizeSqrt = 3;
+  const int kHistoQueueSize = 9;
  int ok = 0;
+  // mapping from an index in image_histo with no NULL histogram to the full
+  // blown image_histo.
+  int* mappings;
+
+  if (*num_used < min_cluster_size) {
+    *do_greedy = 1;
+    return 1;
+  }

-  if (!HistoQueueInit(&histo_queue, kHistoQueueSizeSqrt)) {
+  mappings = (int*) WebPSafeMalloc(*num_used, sizeof(*mappings));
+  if (mappings == NULL || !HistoQueueInit(&histo_queue, kHistoQueueSize)) {
    goto End;
  }
+  // Fill the initial mapping.
+  for (j = 0, iter = 0; iter < image_histo->size; ++iter) {
+    if (histograms[iter] == NULL) continue;
+    mappings[j++] = iter;
+  }
+  assert(j == *num_used);
+
  // Collapse similar histograms in 'image_histo'.
-  ++min_cluster_size;
-  for (iter = 0; iter < outer_iters && image_histo_size >= min_cluster_size &&
-                 ++tries_with_no_success < num_tries_no_success;
+  for (iter = 0;
+       iter < outer_iters && *num_used >= min_cluster_size &&
+           ++tries_with_no_success < num_tries_no_success;
       ++iter) {
+    int* mapping_index;
    double best_cost =
        (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff;
    int best_idx1 = -1, best_idx2 = 1;
-    int j;
-    const uint32_t rand_range = (image_histo_size - 1) * image_histo_size;
-    // image_histo_size / 2 was chosen empirically. Less means faster but worse
+    const uint32_t rand_range = (*num_used - 1) * (*num_used);
+    // (*num_used) / 2 was chosen empirically. Less means faster but worse
    // compression.
-    const int num_tries = image_histo_size / 2;
+    const int num_tries = (*num_used) / 2;

-    for (j = 0; j < num_tries; ++j) {
+    // Pick random samples.
+    for (j = 0; *num_used >= 2 && j < num_tries; ++j) {
      double curr_cost;
      // Choose two different histograms at random and try to combine them.
      const uint32_t tmp = MyRand(&seed) % rand_range;
-      const uint32_t idx1 = tmp / (image_histo_size - 1);
-      uint32_t idx2 = tmp % (image_histo_size - 1);
+      uint32_t idx1 = tmp / (*num_used - 1);
+      uint32_t idx2 = tmp % (*num_used - 1);
      if (idx2 >= idx1) ++idx2;
+      idx1 = mappings[idx1];
+      idx2 = mappings[idx2];

      // Calculate cost reduction on combination.
      curr_cost =
@ -843,18 +975,21 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
    }
    if (histo_queue.size == 0) continue;

-    // Merge the two best histograms.
+    // Get the best histograms.
    best_idx1 = histo_queue.queue[0].idx1;
    best_idx2 = histo_queue.queue[0].idx2;
    assert(best_idx1 < best_idx2);
-    HistogramAddEval(histograms[best_idx1], histograms[best_idx2],
-                     histograms[best_idx1], 0);
-    // Swap the best_idx2 histogram with the last one (which is now unused).
-    --image_histo_size;
-    if (best_idx2 != image_histo_size) {
-      HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
-    }
-    histograms[image_histo_size] = NULL;
+    // Pop best_idx2 from mappings.
+    mapping_index = (int*) bsearch(&best_idx2, mappings, *num_used,
+                                   sizeof(best_idx2), &PairComparison);
+    assert(mapping_index != NULL);
+    memmove(mapping_index, mapping_index + 1, sizeof(*mapping_index) *
+        ((*num_used) - (mapping_index - mappings) - 1));
+    // Merge the histograms and remove best_idx2 from the queue.
+    HistogramAdd(histograms[best_idx2], histograms[best_idx1],
+                 histograms[best_idx1]);
+    histograms[best_idx1]->bit_cost_ = histo_queue.queue[0].cost_combo;
+    HistogramSetRemoveHistogram(image_histo, best_idx2, num_used);
    // Parse the queue and update each pair that deals with best_idx1,
    // best_idx2 or image_histo_size.
    for (j = 0; j < histo_queue.size;) {
@ -877,12 +1012,6 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
        p->idx2 = best_idx1;
        do_eval = 1;
      }
-      if (p->idx2 == image_histo_size) {
-        // No need to re-evaluate here as it does not involve a pair
-        // containing best_idx1 or best_idx2.
-        p->idx2 = best_idx2;
-      }
-      assert(p->idx2 < image_histo_size);
      // Make sure the index order is respected.
      if (p->idx1 > p->idx2) {
        const int tmp = p->idx2;
@ -891,8 +1020,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
      }
      if (do_eval) {
        // Re-evaluate the cost of an updated pair.
-        GetCombinedHistogramEntropy(histograms[p->idx1], histograms[p->idx2], 0,
-                                    &p->cost_diff);
+        HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0., p);
        if (p->cost_diff >= 0.) {
          HistoQueuePopPair(&histo_queue, p);
          continue;
@ -901,15 +1029,14 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
      HistoQueueUpdateHead(&histo_queue, p);
      ++j;
    }
-
    tries_with_no_success = 0;
  }
-  image_histo->size = image_histo_size;
-  *do_greedy = (image_histo->size <= min_cluster_size);
+  *do_greedy = (*num_used <= min_cluster_size);
  ok = 1;

 End:
  HistoQueueClear(&histo_queue);
+  WebPSafeFree(mappings);
  return ok;
 }

@ -917,23 +1044,29 @@ End:
 // Histogram refinement

 // Find the best 'out' histogram for each of the 'in' histograms.
+// At call-time, 'out' contains the histograms of the clusters.
 // Note: we assume that out[]->bit_cost_ is already up-to-date.
 static void HistogramRemap(const VP8LHistogramSet* const in,
-                           const VP8LHistogramSet* const out,
+                           VP8LHistogramSet* const out,
                           uint16_t* const symbols) {
  int i;
  VP8LHistogram** const in_histo = in->histograms;
  VP8LHistogram** const out_histo = out->histograms;
-  const int in_size = in->size;
+  const int in_size = out->max_size;
  const int out_size = out->size;
  if (out_size > 1) {
    for (i = 0; i < in_size; ++i) {
      int best_out = 0;
      double best_bits = MAX_COST;
      int k;
+      if (in_histo[i] == NULL) {
+        // Arbitrarily set to the previous value if unused to help future LZ77.
+        symbols[i] = symbols[i - 1];
+        continue;
+      }
      for (k = 0; k < out_size; ++k) {
-        const double cur_bits =
-            HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
+        double cur_bits;
+        cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
        if (k == 0 || cur_bits < best_bits) {
          best_bits = cur_bits;
          best_out = k;
@ -949,12 +1082,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
  }

  // Recompute each out based on raw and symbols.
-  for (i = 0; i < out_size; ++i) {
-    HistogramClear(out_histo[i]);
-  }
+  VP8LHistogramSetClear(out);
+  out->size = out_size;

  for (i = 0; i < in_size; ++i) {
-    const int idx = symbols[i];
+    int idx;
+    if (in_histo[i] == NULL) continue;
+    idx = symbols[i];
    HistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]);
  }
 }
@ -970,6 +1104,70 @@ static double GetCombineCostFactor(int histo_size, int quality) {
  return combine_cost_factor;
 }

+// Given a HistogramSet 'set', the mapping of clusters 'cluster_mapping' and the
+// current assignment of the cells in 'symbols', merge the clusters and
+// assign the smallest possible clusters values.
+static void OptimizeHistogramSymbols(const VP8LHistogramSet* const set,
+                                     uint16_t* const cluster_mappings,
+                                     int num_clusters,
+                                     uint16_t* const cluster_mappings_tmp,
+                                     uint16_t* const symbols) {
+  int i, cluster_max;
+  int do_continue = 1;
+  // First, assign the lowest cluster to each pixel.
+  while (do_continue) {
+    do_continue = 0;
+    for (i = 0; i < num_clusters; ++i) {
+      int k;
+      k = cluster_mappings[i];
+      while (k != cluster_mappings[k]) {
+        cluster_mappings[k] = cluster_mappings[cluster_mappings[k]];
+        k = cluster_mappings[k];
+      }
+      if (k != cluster_mappings[i]) {
+        do_continue = 1;
+        cluster_mappings[i] = k;
+      }
+    }
+  }
+  // Create a mapping from a cluster id to its minimal version.
+  cluster_max = 0;
+  memset(cluster_mappings_tmp, 0,
+         set->max_size * sizeof(*cluster_mappings_tmp));
+  assert(cluster_mappings[0] == 0);
+  // Re-map the ids.
+  for (i = 0; i < set->max_size; ++i) {
+    int cluster;
+    if (symbols[i] == kInvalidHistogramSymbol) continue;
+    cluster = cluster_mappings[symbols[i]];
+    assert(symbols[i] < num_clusters);
+    if (cluster > 0 && cluster_mappings_tmp[cluster] == 0) {
+      ++cluster_max;
+      cluster_mappings_tmp[cluster] = cluster_max;
+    }
+    symbols[i] = cluster_mappings_tmp[cluster];
+  }
+
+  // Make sure all cluster values are used.
+  cluster_max = 0;
+  for (i = 0; i < set->max_size; ++i) {
+    if (symbols[i] == kInvalidHistogramSymbol) continue;
+    if (symbols[i] <= cluster_max) continue;
+    ++cluster_max;
+    assert(symbols[i] == cluster_max);
+  }
+}
+
+static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) {
+  uint32_t size;
+  int i;
+  for (i = 0, size = 0; i < image_histo->size; ++i) {
+    if (image_histo->histograms[i] == NULL) continue;
+    image_histo->histograms[size++] = image_histo->histograms[i];
+  }
+  image_histo->size = size;
+}
+
 int VP8LGetHistoImageSymbols(int xsize, int ysize,
                             const VP8LBackwardRefs* const refs,
                             int quality, int low_effort,
@ -987,28 +1185,37 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
  // histograms of small sizes (as bin_map will be very sparse) and
  // maximum quality q==100 (to preserve the compression gains at that level).
  const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
-  const int entropy_combine =
-      (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
-
-  if (orig_histo == NULL) goto Error;
+  int entropy_combine;
+  uint16_t* const map_tmp =
+      WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp));
+  uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size;
+  int num_used = image_histo_raw_size;
+  if (orig_histo == NULL || map_tmp == NULL) goto Error;

  // Construct the histograms from backward references.
  HistogramBuild(xsize, histo_bits, refs, orig_histo);
  // Copies the histograms and computes its bit_cost.
-  HistogramCopyAndAnalyze(orig_histo, image_histo);
+  // histogram_symbols is optimized
+  HistogramCopyAndAnalyze(orig_histo, image_histo, &num_used,
+                          histogram_symbols);
+
+  entropy_combine =
+      (num_used > entropy_combine_num_bins * 2) && (quality < 100);

  if (entropy_combine) {
-    const int bin_map_size = orig_histo->size;
-    // Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.
-    uint16_t* const bin_map = histogram_symbols;
+    uint16_t* const bin_map = map_tmp;
    const double combine_cost_factor =
        GetCombineCostFactor(image_histo_raw_size, quality);
+    const uint32_t num_clusters = num_used;

-    HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
+    HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
    // Collapse histograms with similar entropy.
-    HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size,
+    HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols,
+                               cluster_mappings, tmp_histo, bin_map,
                               entropy_combine_num_bins, combine_cost_factor,
                               low_effort);
+    OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters,
+                             map_tmp, histogram_symbols);
  }

  // Don't combine the histograms using stochastic and greedy heuristics for
@ -1018,21 +1225,26 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
    // cubic ramp between 1 and MAX_HISTO_GREEDY:
    const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
    int do_greedy;
-    if (!HistogramCombineStochastic(image_histo, threshold_size, &do_greedy)) {
+    if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size,
+                                    &do_greedy)) {
      goto Error;
    }
-    if (do_greedy && !HistogramCombineGreedy(image_histo)) {
-      goto Error;
+    if (do_greedy) {
+      RemoveEmptyHistograms(image_histo);
+      if (!HistogramCombineGreedy(image_histo, &num_used)) {
+        goto Error;
+      }
    }
  }

-  // TODO(vrabaud): Optimize HistogramRemap for low-effort compression mode.
  // Find the optimal map from original histograms to the final ones.
+  RemoveEmptyHistograms(image_histo);
  HistogramRemap(orig_histo, image_histo, histogram_symbols);

  ok = 1;

 Error:
  VP8LFreeHistogramSet(orig_histo);
+  WebPSafeFree(map_tmp);
  return ok;
 }
--- a/3rdparty/libwebp/src/enc/histogram_enc.h
+++ b/3rdparty/libwebp/src/enc/histogram_enc.h
@ -44,6 +44,7 @@ typedef struct {
  double literal_cost_;      // Cached values of dominant entropy costs:
  double red_cost_;          // literal, red & blue.
  double blue_cost_;
+  uint8_t is_used_[5];       // 5 for literal, red, blue, alpha, distance
 } VP8LHistogram;

 // Collection of histograms with fixed capacity, allocated as one
@ -67,7 +68,9 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
 int VP8LGetHistogramSize(int palette_code_bits);

 // Set the palette_code_bits and reset the stats.
-void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
+// If init_arrays is true, the arrays are also filled with 0's.
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
+                       int init_arrays);

 // Collect all the references into a histogram (without reset)
 void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
@ -83,6 +86,9 @@ void VP8LFreeHistogramSet(VP8LHistogramSet* const histo);
 // using 'cache_bits'. Return NULL in case of memory error.
 VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);

+// Set the histograms in set to 0.
+void VP8LHistogramSetClear(VP8LHistogramSet* const set);
+
 // Allocate and initialize histogram object with specified 'cache_bits'.
 // Returns NULL in case of memory error.
 // Special case of VP8LAllocateHistogramSet, with size equals 1.
@ -113,7 +119,7 @@ double VP8LBitsEntropy(const uint32_t* const array, int n);

 // Estimate how many bits the combined entropy of literals and distance
 // approximately maps to.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+double VP8LHistogramEstimateBits(VP8LHistogram* const p);

 #ifdef __cplusplus
 }
--- a/3rdparty/libwebp/src/enc/iterator_enc.c
+++ b/3rdparty/libwebp/src/enc/iterator_enc.c
@ -128,7 +128,7 @@ static void ImportLine(const uint8_t* src, int src_stride,
  for (; i < total_len; ++i) dst[i] = dst[len - 1];
 }

-void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) {
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
  const VP8Encoder* const enc = it->enc_;
  const int x = it->x_, y = it->y_;
  const WebPPicture* const pic = enc->pic_;
--- a/3rdparty/libwebp/src/enc/picture_tools_enc.c
+++ b/3rdparty/libwebp/src/enc/picture_tools_enc.c
@ -16,10 +16,6 @@
 #include "src/enc/vp8i_enc.h"
 #include "src/dsp/yuv.h"

-static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
-  return (0xff000000u | (r << 16) | (g << 8) | b);
-}
-
 //------------------------------------------------------------------------------
 // Helper: clean up fully transparent area to help compressibility.

@ -195,6 +191,10 @@ void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) {
 #define BLEND_10BIT(V0, V1, ALPHA) \
    ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101 + 1024) >> 18)

+static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
+  return (0xff000000u | (r << 16) | (g << 8) | b);
+}
+
 void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
  const int red = (background_rgb >> 16) & 0xff;
  const int green = (background_rgb >> 8) & 0xff;
@ -208,39 +208,44 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
    const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
    const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
    const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
-    if (!has_alpha || pic->a == NULL) return;    // nothing to do
+    uint8_t* y_ptr = pic->y;
+    uint8_t* u_ptr = pic->u;
+    uint8_t* v_ptr = pic->v;
+    uint8_t* a_ptr = pic->a;
+    if (!has_alpha || a_ptr == NULL) return;    // nothing to do
    for (y = 0; y < pic->height; ++y) {
      // Luma blending
-      uint8_t* const y_ptr = pic->y + y * pic->y_stride;
-      uint8_t* const a_ptr = pic->a + y * pic->a_stride;
      for (x = 0; x < pic->width; ++x) {
-        const int alpha = a_ptr[x];
+        const uint8_t alpha = a_ptr[x];
        if (alpha < 0xff) {
-          y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]);
+          y_ptr[x] = BLEND(Y0, y_ptr[x], alpha);
        }
      }
      // Chroma blending every even line
      if ((y & 1) == 0) {
-        uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride;
-        uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride;
        uint8_t* const a_ptr2 =
            (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
        for (x = 0; x < uv_width; ++x) {
          // Average four alpha values into a single blending weight.
          // TODO(skal): might lead to visible contouring. Can we do better?
-          const int alpha =
+          const uint32_t alpha =
              a_ptr[2 * x + 0] + a_ptr[2 * x + 1] +
              a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1];
-          u[x] = BLEND_10BIT(U0, u[x], alpha);
-          v[x] = BLEND_10BIT(V0, v[x], alpha);
+          u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
+          v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
        }
        if (pic->width & 1) {   // rightmost pixel
-          const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
-          u[x] = BLEND_10BIT(U0, u[x], alpha);
-          v[x] = BLEND_10BIT(V0, v[x], alpha);
+          const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
+          u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
+          v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
        }
+      } else {
+        u_ptr += pic->uv_stride;
+        v_ptr += pic->uv_stride;
      }
-      memset(a_ptr, 0xff, pic->width);
+      memset(a_ptr, 0xff, pic->width);  // reset alpha value to opaque
+      a_ptr += pic->a_stride;
+      y_ptr += pic->y_stride;
    }
  } else {
    uint32_t* argb = pic->argb;
--- a/3rdparty/libwebp/src/enc/predictor_enc.c
+++ b/3rdparty/libwebp/src/enc/predictor_enc.c
@ -177,12 +177,15 @@ static uint8_t NearLosslessComponent(uint8_t value, uint8_t predict,
  }
 }

+static WEBP_INLINE uint8_t NearLosslessDiff(uint8_t a, uint8_t b) {
+  return (uint8_t)((((int)(a) - (int)(b))) & 0xff);
+}
+
 // Quantize every component of the difference between the actual pixel value and
 // its prediction to a multiple of a quantization (a power of 2, not larger than
 // max_quantization which is a power of 2, smaller than max_diff). Take care if
 // value and predict have undergone subtract green, which means that red and
 // blue are represented as offsets from green.
-#define NEAR_LOSSLESS_DIFF(a, b) (uint8_t)((((int)(a) - (int)(b))) & 0xff)
 static uint32_t NearLossless(uint32_t value, uint32_t predict,
                             int max_quantization, int max_diff,
                             int used_subtract_green) {
@ -199,7 +202,7 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict,
  }
  if ((value >> 24) == 0 || (value >> 24) == 0xff) {
    // Preserve transparency of fully transparent or fully opaque pixels.
-    a = NEAR_LOSSLESS_DIFF(value >> 24, predict >> 24);
+    a = NearLosslessDiff(value >> 24, predict >> 24);
  } else {
    a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization);
  }
@ -212,16 +215,15 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict,
    // The amount by which green has been adjusted during quantization. It is
    // subtracted from red and blue for compensation, to avoid accumulating two
    // quantization errors in them.
-    green_diff = NEAR_LOSSLESS_DIFF(new_green, value >> 8);
+    green_diff = NearLosslessDiff(new_green, value >> 8);
  }
-  r = NearLosslessComponent(NEAR_LOSSLESS_DIFF(value >> 16, green_diff),
+  r = NearLosslessComponent(NearLosslessDiff(value >> 16, green_diff),
                            (predict >> 16) & 0xff, 0xff - new_green,
                            quantization);
-  b = NearLosslessComponent(NEAR_LOSSLESS_DIFF(value, green_diff),
+  b = NearLosslessComponent(NearLosslessDiff(value, green_diff),
                            predict & 0xff, 0xff - new_green, quantization);
  return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;
 }
-#undef NEAR_LOSSLESS_DIFF
 #endif  // (WEBP_NEAR_LOSSLESS == 1)

 // Stores the difference between the pixel and its prediction in "out".
--- a/3rdparty/libwebp/src/enc/quant_enc.c
+++ b/3rdparty/libwebp/src/enc/quant_enc.c
@ -15,6 +15,7 @@
 #include <math.h>
 #include <stdlib.h>  // for abs()

+#include "src/dsp/quant.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/enc/cost_enc.h"

@ -977,19 +978,6 @@ static void SwapOut(VP8EncIterator* const it) {
  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
 }

-static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
-  score_t score = 0;
-  while (num_blocks-- > 0) {      // TODO(skal): refine positional scoring?
-    int i;
-    for (i = 1; i < 16; ++i) {    // omit DC, we're only interested in AC
-      score += (levels[i] != 0);
-      if (score > thresh) return 0;
-    }
-    levels += 16;
-  }
-  return 1;
-}
-
 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
  const int kNumBlocks = 16;
  VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
--- a/3rdparty/libwebp/src/enc/vp8i_enc.h
+++ b/3rdparty/libwebp/src/enc/vp8i_enc.h
@ -32,7 +32,7 @@ extern "C" {
 // version numbers
 #define ENC_MAJ_VERSION 1
 #define ENC_MIN_VERSION 0
-#define ENC_REV_VERSION 0
+#define ENC_REV_VERSION 2

 enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
       MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
@ -278,7 +278,7 @@ int VP8IteratorIsDone(const VP8EncIterator* const it);
 // Import uncompressed samples from source.
 // If tmp_32 is not NULL, import boundary samples too.
 // tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
-void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32);
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32);
 // export decimated samples
 void VP8IteratorExport(const VP8EncIterator* const it);
 // go to next macroblock. Returns false if not finished.
@ -515,4 +515,4 @@ void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
 }    // extern "C"
 #endif

-#endif  /* WEBP_ENC_VP8I_ENC_H_ */
+#endif  // WEBP_ENC_VP8I_ENC_H_
--- a/3rdparty/libwebp/src/enc/vp8l_enc.c
+++ b/3rdparty/libwebp/src/enc/vp8l_enc.c
@ -462,6 +462,7 @@ static int GetHuffBitLengthsAndCodes(
  for (i = 0; i < histogram_image_size; ++i) {
    const VP8LHistogram* const histo = histogram_image->histograms[i];
    HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+    assert(histo != NULL);
    for (k = 0; k < 5; ++k) {
      const int num_symbols =
          (k == 0) ? VP8LHistogramNumCodes(histo->palette_code_bits_) :
@ -809,6 +810,7 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
    goto Error;
  }
+  VP8LHistogramSetClear(histogram_image);

  // Build histogram image and symbols from backward references.
  VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]);
@ -1248,14 +1250,20 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) {
  const WebPPicture* const picture = enc->pic_;
  const int width = picture->width;
  const int height = picture->height;
-  int y;
+
  err = AllocateTransformBuffer(enc, width, height);
  if (err != VP8_ENC_OK) return err;
  if (enc->argb_content_ == kEncoderARGB) return VP8_ENC_OK;
-  for (y = 0; y < height; ++y) {
-    memcpy(enc->argb_ + y * width,
-           picture->argb + y * picture->argb_stride,
-           width * sizeof(*enc->argb_));
+
+  {
+    uint32_t* dst = enc->argb_;
+    const uint32_t* src = picture->argb;
+    int y;
+    for (y = 0; y < height; ++y) {
+      memcpy(dst, src, width * sizeof(*dst));
+      dst += width;
+      src += picture->argb_stride;
+    }
  }
  enc->argb_content_ = kEncoderARGB;
  assert(enc->current_width_ == width);
--- a/3rdparty/libwebp/src/enc/vp8li_enc.h
+++ b/3rdparty/libwebp/src/enc/vp8li_enc.h
@ -115,4 +115,4 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
 }    // extern "C"
 #endif

-#endif  /* WEBP_ENC_VP8LI_ENC_H_ */
+#endif  // WEBP_ENC_VP8LI_ENC_H_
--- a/3rdparty/libwebp/src/mux/animi.h
+++ b/3rdparty/libwebp/src/mux/animi.h
@ -40,4 +40,4 @@ int WebPAnimEncoderRefineRect(
 }    // extern "C"
 #endif

-#endif  /* WEBP_MUX_ANIMI_H_ */
+#endif  // WEBP_MUX_ANIMI_H_
--- a/3rdparty/libwebp/src/mux/muxedit.c
+++ b/3rdparty/libwebp/src/mux/muxedit.c
@ -69,12 +69,12 @@ void WebPMuxDelete(WebPMux* mux) {
  if (idx == (INDEX)) {                                                        \
    err = ChunkAssignData(&chunk, data, copy_data, tag);                       \
    if (err == WEBP_MUX_OK) {                                                  \
-      err = ChunkSetNth(&chunk, (LIST), nth);                                  \
+      err = ChunkSetHead(&chunk, (LIST));                                      \
    }                                                                          \
    return err;                                                                \
  }

-static WebPMuxError MuxSet(WebPMux* const mux, uint32_t tag, uint32_t nth,
+static WebPMuxError MuxSet(WebPMux* const mux, uint32_t tag,
                           const WebPData* const data, int copy_data) {
  WebPChunk chunk;
  WebPMuxError err = WEBP_MUX_NOT_FOUND;
@ -190,7 +190,7 @@ WebPMuxError WebPMuxSetChunk(WebPMux* mux, const char fourcc[4],
  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;

  // Add the given chunk.
-  return MuxSet(mux, tag, 1, chunk_data, copy_data);
+  return MuxSet(mux, tag, chunk_data, copy_data);
 }

 // Creates a chunk from given 'data' and sets it as 1st chunk in 'chunk_list'.
@ -202,7 +202,7 @@ static WebPMuxError AddDataToChunkList(
  ChunkInit(&chunk);
  err = ChunkAssignData(&chunk, data, copy_data, tag);
  if (err != WEBP_MUX_OK) goto Err;
-  err = ChunkSetNth(&chunk, chunk_list, 1);
+  err = ChunkSetHead(&chunk, chunk_list);
  if (err != WEBP_MUX_OK) goto Err;
  return WEBP_MUX_OK;
 Err:
@ -266,14 +266,14 @@ WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* info,
                              int copy_data) {
  WebPMuxImage wpi;
  WebPMuxError err;
-  const WebPData* const bitstream = &info->bitstream;

  // Sanity checks.
  if (mux == NULL || info == NULL) return WEBP_MUX_INVALID_ARGUMENT;

  if (info->id != WEBP_CHUNK_ANMF) return WEBP_MUX_INVALID_ARGUMENT;

-  if (bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) {
+  if (info->bitstream.bytes == NULL ||
+      info->bitstream.size > MAX_CHUNK_PAYLOAD) {
    return WEBP_MUX_INVALID_ARGUMENT;
  }

@ -287,7 +287,7 @@ WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* info,
  }

  MuxImageInit(&wpi);
-  err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
+  err = SetAlphaAndImageChunks(&info->bitstream, copy_data, &wpi);
  if (err != WEBP_MUX_OK) goto Err;
  assert(wpi.img_ != NULL);  // As SetAlphaAndImageChunks() was successful.

@ -342,7 +342,7 @@ WebPMuxError WebPMuxSetAnimationParams(WebPMux* mux,
  // Set the animation parameters.
  PutLE32(data, params->bgcolor);
  PutLE16(data + 4, params->loop_count);
-  return MuxSet(mux, kChunks[IDX_ANIM].tag, 1, &anim, 1);
+  return MuxSet(mux, kChunks[IDX_ANIM].tag, &anim, 1);
 }

 WebPMuxError WebPMuxSetCanvasSize(WebPMux* mux,
@ -540,7 +540,7 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
  PutLE24(data + 4, width - 1);   // canvas width.
  PutLE24(data + 7, height - 1);  // canvas height.

-  return MuxSet(mux, kChunks[IDX_VP8X].tag, 1, &vp8x, 1);
+  return MuxSet(mux, kChunks[IDX_VP8X].tag, &vp8x, 1);
 }

 // Cleans up 'mux' by removing any unnecessary chunks.
--- a/3rdparty/libwebp/src/mux/muxi.h
+++ b/3rdparty/libwebp/src/mux/muxi.h
@ -14,6 +14,7 @@
 #ifndef WEBP_MUX_MUXI_H_
 #define WEBP_MUX_MUXI_H_

+#include <assert.h>
 #include <stdlib.h>
 #include "src/dec/vp8i_dec.h"
 #include "src/dec/vp8li_dec.h"
@ -28,7 +29,7 @@ extern "C" {

 #define MUX_MAJ_VERSION 1
 #define MUX_MIN_VERSION 0
-#define MUX_REV_VERSION 0
+#define MUX_REV_VERSION 2

 // Chunk object.
 typedef struct WebPChunk WebPChunk;
@ -126,11 +127,14 @@ WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag);
 WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
                             int copy_data, uint32_t tag);

-// Sets 'chunk' at nth position in the 'chunk_list'.
-// nth = 0 has the special meaning "last of the list".
+// Sets 'chunk' as the only element in 'chunk_list' if it is empty.
 // On success ownership is transferred from 'chunk' to the 'chunk_list'.
-WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
-                         uint32_t nth);
+WebPMuxError ChunkSetHead(WebPChunk* const chunk, WebPChunk** const chunk_list);
+// Sets 'chunk' at last position in the 'chunk_list'.
+// On success ownership is transferred from 'chunk' to the 'chunk_list'.
+// *chunk_list also points towards the last valid element of the initial
+// *chunk_list.
+WebPMuxError ChunkAppend(WebPChunk* const chunk, WebPChunk*** const chunk_list);

 // Releases chunk and returns chunk->next_.
 WebPChunk* ChunkRelease(WebPChunk* const chunk);
@ -143,13 +147,13 @@ void ChunkListDelete(WebPChunk** const chunk_list);

 // Returns size of the chunk including chunk header and padding byte (if any).
 static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
+  assert(chunk_size <= MAX_CHUNK_PAYLOAD);
  return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
 }

 // Size of a chunk including header and padding.
 static WEBP_INLINE size_t ChunkDiskSize(const WebPChunk* chunk) {
  const size_t data_size = chunk->data_.size;
-  assert(data_size < MAX_CHUNK_PAYLOAD);
  return SizeWithPadding(data_size);
 }

@ -227,4 +231,4 @@ WebPMuxError MuxValidate(const WebPMux* const mux);
 }    // extern "C"
 #endif

-#endif  /* WEBP_MUX_MUXI_H_ */
+#endif  // WEBP_MUX_MUXI_H_
--- a/3rdparty/libwebp/src/mux/muxinternal.c
+++ b/3rdparty/libwebp/src/mux/muxinternal.c
@ -111,27 +111,6 @@ WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
  return ((nth > 0) && (iter > 0)) ? NULL : first;
 }

-// Outputs a pointer to 'prev_chunk->next_',
-//   where 'prev_chunk' is the pointer to the chunk at position (nth - 1).
-// Returns true if nth chunk was found.
-static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
-                                WebPChunk*** const location) {
-  uint32_t count = 0;
-  assert(chunk_list != NULL);
-  *location = chunk_list;
-
-  while (*chunk_list != NULL) {
-    WebPChunk* const cur_chunk = *chunk_list;
-    ++count;
-    if (count == nth) return 1;  // Found.
-    chunk_list = &cur_chunk->next_;
-    *location = chunk_list;
-  }
-
-  // *chunk_list is ok to be NULL if adding at last location.
-  return (nth == 0 || (count == nth - 1)) ? 1 : 0;
-}
-
 //------------------------------------------------------------------------------
 // Chunk writer methods.

@ -156,11 +135,12 @@ WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
  return WEBP_MUX_OK;
 }

-WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
-                         uint32_t nth) {
+WebPMuxError ChunkSetHead(WebPChunk* const chunk,
+                          WebPChunk** const chunk_list) {
  WebPChunk* new_chunk;

-  if (!ChunkSearchListToSet(chunk_list, nth, &chunk_list)) {
+  assert(chunk_list != NULL);
+  if (*chunk_list != NULL) {
    return WEBP_MUX_NOT_FOUND;
  }

@ -168,11 +148,26 @@ WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
  if (new_chunk == NULL) return WEBP_MUX_MEMORY_ERROR;
  *new_chunk = *chunk;
  chunk->owner_ = 0;
-  new_chunk->next_ = *chunk_list;
+  new_chunk->next_ = NULL;
  *chunk_list = new_chunk;
  return WEBP_MUX_OK;
 }

+WebPMuxError ChunkAppend(WebPChunk* const chunk,
+                         WebPChunk*** const chunk_list) {
+  assert(chunk_list != NULL && *chunk_list != NULL);
+
+  if (**chunk_list == NULL) {
+    ChunkSetHead(chunk, *chunk_list);
+  } else {
+    WebPChunk* last_chunk = **chunk_list;
+    while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_;
+    ChunkSetHead(chunk, &last_chunk->next_);
+    *chunk_list = &last_chunk->next_;
+  }
+  return WEBP_MUX_OK;
+}
+
 //------------------------------------------------------------------------------
 // Chunk deletion method(s).

@ -232,9 +227,11 @@ void MuxImageInit(WebPMuxImage* const wpi) {
 WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi) {
  WebPMuxImage* next;
  if (wpi == NULL) return NULL;
-  ChunkDelete(wpi->header_);
-  ChunkDelete(wpi->alpha_);
-  ChunkDelete(wpi->img_);
+  // There should be at most one chunk of header_, alpha_, img_ but we call
+  // ChunkListDelete to be safe
+  ChunkListDelete(&wpi->header_);
+  ChunkListDelete(&wpi->alpha_);
+  ChunkListDelete(&wpi->img_);
  ChunkListDelete(&wpi->unknown_);

  next = wpi->next_;
--- a/3rdparty/libwebp/src/mux/muxread.c
+++ b/3rdparty/libwebp/src/mux/muxread.c
@ -59,6 +59,7 @@ static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk,
  // Sanity checks.
  if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA;
  chunk_size = GetLE32(data + TAG_SIZE);
+  if (chunk_size > MAX_CHUNK_PAYLOAD) return WEBP_MUX_BAD_DATA;

  {
    const size_t chunk_disk_size = SizeWithPadding(chunk_size);
@ -102,6 +103,7 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
  const uint8_t* const last = bytes + size;
  WebPChunk subchunk;
  size_t subchunk_size;
+  WebPChunk** unknown_chunk_list = &wpi->unknown_;
  ChunkInit(&subchunk);

  assert(chunk->tag_ == kChunks[IDX_ANMF].tag);
@ -116,7 +118,7 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
    if (size < hdr_size) goto Fail;
    ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
  }
-  ChunkSetNth(&subchunk, &wpi->header_, 1);
+  ChunkSetHead(&subchunk, &wpi->header_);
  wpi->is_partial_ = 1;  // Waiting for ALPH and/or VP8/VP8L chunks.

  // Rest of the chunks.
@ -133,18 +135,23 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
    switch (ChunkGetIdFromTag(subchunk.tag_)) {
      case WEBP_CHUNK_ALPHA:
        if (wpi->alpha_ != NULL) goto Fail;  // Consecutive ALPH chunks.
-        if (ChunkSetNth(&subchunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Fail;
+        if (ChunkSetHead(&subchunk, &wpi->alpha_) != WEBP_MUX_OK) goto Fail;
        wpi->is_partial_ = 1;  // Waiting for a VP8 chunk.
        break;
      case WEBP_CHUNK_IMAGE:
-        if (ChunkSetNth(&subchunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Fail;
+        if (wpi->img_ != NULL) goto Fail;  // Only 1 image chunk allowed.
+        if (ChunkSetHead(&subchunk, &wpi->img_) != WEBP_MUX_OK) goto Fail;
        if (!MuxImageFinalize(wpi)) goto Fail;
        wpi->is_partial_ = 0;  // wpi is completely filled.
        break;
      case WEBP_CHUNK_UNKNOWN:
-        if (wpi->is_partial_) goto Fail;  // Encountered an unknown chunk
-                                          // before some image chunks.
-        if (ChunkSetNth(&subchunk, &wpi->unknown_, 0) != WEBP_MUX_OK) goto Fail;
+        if (wpi->is_partial_) {
+          goto Fail;  // Encountered an unknown chunk
+                      // before some image chunks.
+        }
+        if (ChunkAppend(&subchunk, &unknown_chunk_list) != WEBP_MUX_OK) {
+          goto Fail;
+        }
        break;
      default:
        goto Fail;
@ -175,6 +182,9 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
  const uint8_t* data;
  size_t size;
  WebPChunk chunk;
+  // Stores the end of the chunk lists so that it is faster to append data to
+  // their ends.
+  WebPChunk** chunk_list_ends[WEBP_CHUNK_NIL + 1] = { NULL };
  ChunkInit(&chunk);

  // Sanity checks.
@ -187,7 +197,7 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
  size = bitstream->size;

  if (data == NULL) return NULL;
-  if (size < RIFF_HEADER_SIZE) return NULL;
+  if (size < RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE) return NULL;
  if (GetLE32(data + 0) != MKFOURCC('R', 'I', 'F', 'F') ||
      GetLE32(data + CHUNK_HEADER_SIZE) != MKFOURCC('W', 'E', 'B', 'P')) {
    return NULL;
@ -196,8 +206,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
  mux = WebPMuxNew();
  if (mux == NULL) return NULL;

-  if (size < RIFF_HEADER_SIZE + TAG_SIZE) goto Err;
-
  tag = GetLE32(data + RIFF_HEADER_SIZE);
  if (tag != kChunks[IDX_VP8].tag &&
      tag != kChunks[IDX_VP8L].tag &&
@ -205,13 +213,17 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
    goto Err;  // First chunk should be VP8, VP8L or VP8X.
  }

-  riff_size = SizeWithPadding(GetLE32(data + TAG_SIZE));
-  if (riff_size > MAX_CHUNK_PAYLOAD || riff_size > size) {
-    goto Err;
-  } else {
-    if (riff_size < size) {  // Redundant data after last chunk.
-      size = riff_size;  // To make sure we don't read any data beyond mux_size.
-    }
+  riff_size = GetLE32(data + TAG_SIZE);
+  if (riff_size > MAX_CHUNK_PAYLOAD) goto Err;
+
+  // Note this padding is historical and differs from demux.c which does not
+  // pad the file size.
+  riff_size = SizeWithPadding(riff_size);
+  if (riff_size < CHUNK_HEADER_SIZE) goto Err;
+  if (riff_size > size) goto Err;
+  // There's no point in reading past the end of the RIFF chunk.
+  if (size > riff_size + CHUNK_HEADER_SIZE) {
+    size = riff_size + CHUNK_HEADER_SIZE;
  }

  end = data + size;
@ -226,7 +238,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
  while (data != end) {
    size_t data_size;
    WebPChunkId id;
-    WebPChunk** chunk_list;
    if (ChunkVerifyAndAssign(&chunk, data, size, riff_size,
                             copy_data) != WEBP_MUX_OK) {
      goto Err;
@ -236,11 +247,11 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
    switch (id) {
      case WEBP_CHUNK_ALPHA:
        if (wpi->alpha_ != NULL) goto Err;  // Consecutive ALPH chunks.
-        if (ChunkSetNth(&chunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Err;
+        if (ChunkSetHead(&chunk, &wpi->alpha_) != WEBP_MUX_OK) goto Err;
        wpi->is_partial_ = 1;  // Waiting for a VP8 chunk.
        break;
      case WEBP_CHUNK_IMAGE:
-        if (ChunkSetNth(&chunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Err;
+        if (ChunkSetHead(&chunk, &wpi->img_) != WEBP_MUX_OK) goto Err;
        if (!MuxImageFinalize(wpi)) goto Err;
        wpi->is_partial_ = 0;  // wpi is completely filled.
 PushImage:
@ -257,9 +268,13 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
      default:  // A non-image chunk.
        if (wpi->is_partial_) goto Err;  // Encountered a non-image chunk before
                                         // getting all chunks of an image.
-        chunk_list = MuxGetChunkListFromId(mux, id);  // List to add this chunk.
-        if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
+        if (chunk_list_ends[id] == NULL) {
+          chunk_list_ends[id] =
+              MuxGetChunkListFromId(mux, id);  // List to add this chunk.
+        }
+        if (ChunkAppend(&chunk, &chunk_list_ends[id]) != WEBP_MUX_OK) goto Err;
        if (id == WEBP_CHUNK_VP8X) {  // grab global specs
+          if (data_size < CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE) goto Err;
          mux->canvas_width_ = GetLE24(data + 12) + 1;
          mux->canvas_height_ = GetLE24(data + 15) + 1;
        }
@ -385,6 +400,10 @@ static WebPMuxError SynthesizeBitstream(const WebPMuxImage* const wpi,
  uint8_t* const data = (uint8_t*)WebPSafeMalloc(1ULL, size);
  if (data == NULL) return WEBP_MUX_MEMORY_ERROR;

+  // There should be at most one alpha_ chunk and exactly one img_ chunk.
+  assert(wpi->alpha_ == NULL || wpi->alpha_->next_ == NULL);
+  assert(wpi->img_ != NULL && wpi->img_->next_ == NULL);
+
  // Main RIFF header.
  dst = MuxEmitRiffHeader(data, size);

--- a/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h
+++ b/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h
@ -187,4 +187,4 @@ static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) {
 }    // extern "C"
 #endif

-#endif   // WEBP_UTILS_BIT_READER_INL_UTILS_H_
+#endif  // WEBP_UTILS_BIT_READER_INL_UTILS_H_
--- a/3rdparty/libwebp/src/utils/bit_reader_utils.h
+++ b/3rdparty/libwebp/src/utils/bit_reader_utils.h
@ -172,4 +172,4 @@ static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) {
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_BIT_READER_UTILS_H_ */
+#endif  // WEBP_UTILS_BIT_READER_UTILS_H_
--- a/3rdparty/libwebp/src/utils/bit_writer_utils.c
+++ b/3rdparty/libwebp/src/utils/bit_writer_utils.c
@ -248,6 +248,7 @@ int VP8LBitWriterClone(const VP8LBitWriter* const src,
  dst->bits_ = src->bits_;
  dst->used_ = src->used_;
  dst->error_ = src->error_;
+  dst->cur_ = dst->buf_ + current_size;
  return 1;
 }

--- a/3rdparty/libwebp/src/utils/bit_writer_utils.h
+++ b/3rdparty/libwebp/src/utils/bit_writer_utils.h
@ -151,4 +151,4 @@ static WEBP_INLINE void VP8LPutBits(VP8LBitWriter* const bw,
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_BIT_WRITER_UTILS_H_ */
+#endif  // WEBP_UTILS_BIT_WRITER_UTILS_H_
--- a/3rdparty/libwebp/src/utils/filters_utils.h
+++ b/3rdparty/libwebp/src/utils/filters_utils.h
@ -29,4 +29,4 @@ WEBP_FILTER_TYPE WebPEstimateBestFilter(const uint8_t* data,
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_FILTERS_UTILS_H_ */
+#endif  // WEBP_UTILS_FILTERS_UTILS_H_
--- a/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c
+++ b/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c
@ -261,9 +261,15 @@ static void CleanupParams(SmoothParams* const p) {

 int WebPDequantizeLevels(uint8_t* const data, int width, int height, int stride,
                         int strength) {
-  const int radius = 4 * strength / 100;
+  int radius = 4 * strength / 100;
+
  if (strength < 0 || strength > 100) return 0;
  if (data == NULL || width <= 0 || height <= 0) return 0;  // bad params
+
+  // limit the filter size to not exceed the image dimensions
+  if (2 * radius + 1 > width) radius = (width - 1) >> 1;
+  if (2 * radius + 1 > height) radius = (height - 1) >> 1;
+
  if (radius > 0) {
    SmoothParams p;
    memset(&p, 0, sizeof(p));
--- a/3rdparty/libwebp/src/utils/quant_levels_dec_utils.h
+++ b/3rdparty/libwebp/src/utils/quant_levels_dec_utils.h
@ -32,4 +32,4 @@ int WebPDequantizeLevels(uint8_t* const data, int width, int height, int stride,
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_ */
+#endif  // WEBP_UTILS_QUANT_LEVELS_DEC_UTILS_H_
--- a/3rdparty/libwebp/src/utils/quant_levels_utils.h
+++ b/3rdparty/libwebp/src/utils/quant_levels_utils.h
@ -33,4 +33,4 @@ int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_QUANT_LEVELS_UTILS_H_ */
+#endif  // WEBP_UTILS_QUANT_LEVELS_UTILS_H_
--- a/3rdparty/libwebp/src/utils/random_utils.h
+++ b/3rdparty/libwebp/src/utils/random_utils.h
@ -60,4 +60,4 @@ static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_RANDOM_UTILS_H_ */
+#endif  // WEBP_UTILS_RANDOM_UTILS_H_
--- a/3rdparty/libwebp/src/utils/rescaler_utils.h
+++ b/3rdparty/libwebp/src/utils/rescaler_utils.h
@ -98,4 +98,4 @@ int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_RESCALER_UTILS_H_ */
+#endif  // WEBP_UTILS_RESCALER_UTILS_H_
--- a/3rdparty/libwebp/src/utils/thread_utils.h
+++ b/3rdparty/libwebp/src/utils/thread_utils.h
@ -87,4 +87,4 @@ WEBP_EXTERN const WebPWorkerInterface* WebPGetWorkerInterface(void);
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_THREAD_UTILS_H_ */
+#endif  // WEBP_UTILS_THREAD_UTILS_H_
--- a/3rdparty/libwebp/src/utils/utils.h
+++ b/3rdparty/libwebp/src/utils/utils.h
@ -107,19 +107,6 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
  PutLE16(data + 2, (int)(val >> 16));
 }

-// Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either
-// based on table or not. Can be used as fallback if clz() is not available.
-#define WEBP_NEED_LOG_TABLE_8BIT
-extern const uint8_t WebPLogTable8bit[256];
-static WEBP_INLINE int WebPLog2FloorC(uint32_t n) {
-  int log_value = 0;
-  while (n >= 256) {
-    log_value += 8;
-    n >>= 8;
-  }
-  return log_value + WebPLogTable8bit[n];
-}
-
 // Returns (int)floor(log2(n)). n must be > 0.
 // use GNU builtins where available.
 #if defined(__GNUC__) && \
@ -138,6 +125,19 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
  return first_set_bit;
 }
 #else   // default: use the C-version.
+// Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either
+// based on table or not. Can be used as fallback if clz() is not available.
+#define WEBP_NEED_LOG_TABLE_8BIT
+extern const uint8_t WebPLogTable8bit[256];
+static WEBP_INLINE int WebPLog2FloorC(uint32_t n) {
+  int log_value = 0;
+  while (n >= 256) {
+    log_value += 8;
+    n >>= 8;
+  }
+  return log_value + WebPLogTable8bit[n];
+}
+
 static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); }
 #endif

@ -175,4 +175,4 @@ WEBP_EXTERN int WebPGetColorPalette(const struct WebPPicture* const pic,
 }    // extern "C"
 #endif

-#endif  /* WEBP_UTILS_UTILS_H_ */
+#endif  // WEBP_UTILS_UTILS_H_
--- a/3rdparty/libwebp/src/webp/decode.h
+++ b/3rdparty/libwebp/src/webp/decode.h
@ -42,6 +42,12 @@ WEBP_EXTERN int WebPGetDecoderVersion(void);
 // This function will also validate the header, returning true on success,
 // false otherwise. '*width' and '*height' are only valid on successful return.
 // Pointers 'width' and 'height' can be passed NULL if deemed irrelevant.
+// Note: The following chunk sequences (before the raw VP8/VP8L data) are
+// considered valid by this function:
+// RIFF + VP8(L)
+// RIFF + VP8X + (optional chunks) + VP8(L)
+// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
+// VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
 WEBP_EXTERN int WebPGetInfo(const uint8_t* data, size_t data_size,
                            int* width, int* height);

@ -425,6 +431,12 @@ WEBP_EXTERN VP8StatusCode WebPGetFeaturesInternal(
 // Returns VP8_STATUS_OK when the features are successfully retrieved. Returns
 // VP8_STATUS_NOT_ENOUGH_DATA when more data is needed to retrieve the
 // features from headers. Returns error in other cases.
+// Note: The following chunk sequences (before the raw VP8/VP8L data) are
+// considered valid by this function:
+// RIFF + VP8(L)
+// RIFF + VP8X + (optional chunks) + VP8(L)
+// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
+// VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
 static WEBP_INLINE VP8StatusCode WebPGetFeatures(
    const uint8_t* data, size_t data_size,
    WebPBitstreamFeatures* features) {
@ -491,4 +503,4 @@ WEBP_EXTERN VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
 }    // extern "C"
 #endif

-#endif  /* WEBP_WEBP_DECODE_H_ */
+#endif  // WEBP_WEBP_DECODE_H_
--- a/Show More
+++ b/Show More