From b781ac73464c28c18fd05a937bfd7747ba491387 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Fri, 1 Jun 2018 10:54:12 +0300
Subject: [PATCH 01/33] Make Intel's Inference Engine backend is default if no
 preferable backend is specified.

---
 modules/dnn/include/opencv2/dnn/dnn.hpp       |  19 ++-
 modules/dnn/perf/perf_net.cpp                 |  12 +-
 modules/dnn/src/dnn.cpp                       |  54 ++++++---
 modules/dnn/src/layers/batch_norm_layer.cpp   |   2 +-
 modules/dnn/src/layers/blank_layer.cpp        |   2 +-
 modules/dnn/src/layers/concat_layer.cpp       |   2 +-
 modules/dnn/src/layers/convolution_layer.cpp  |  35 +++++-
 .../dnn/src/layers/detection_output_layer.cpp |   2 +-
 modules/dnn/src/layers/elementwise_layers.cpp |   7 +-
 modules/dnn/src/layers/eltwise_layer.cpp      |   2 +-
 modules/dnn/src/layers/flatten_layer.cpp      |   2 +-
 .../dnn/src/layers/fully_connected_layer.cpp  |   2 +-
 modules/dnn/src/layers/lrn_layer.cpp          |   2 +-
 .../dnn/src/layers/max_unpooling_layer.cpp    |   2 +-
 .../dnn/src/layers/normalize_bbox_layer.cpp   |   2 +-
 modules/dnn/src/layers/padding_layer.cpp      |   2 +-
 modules/dnn/src/layers/permute_layer.cpp      |   2 +-
 modules/dnn/src/layers/pooling_layer.cpp      |   2 +-
 modules/dnn/src/layers/prior_box_layer.cpp    |   2 +-
 modules/dnn/src/layers/reorg_layer.cpp        |   5 -
 modules/dnn/src/layers/reshape_layer.cpp      |   2 +-
 .../layers/resize_nearest_neighbor_layer.cpp  |   2 +-
 modules/dnn/src/layers/scale_layer.cpp        |   2 +-
 modules/dnn/src/layers/softmax_layer.cpp      |   2 +-
 modules/dnn/test/test_backends.cpp            |  22 ++--
 modules/dnn/test/test_caffe_importer.cpp      |  15 ++-
 modules/dnn/test/test_darknet_importer.cpp    |   9 +-
 modules/dnn/test/test_googlenet.cpp           | 110 ++++--------------
 modules/dnn/test/test_halide_layers.cpp       |   6 +
 modules/dnn/test/test_layers.cpp              |  16 ++-
 modules/dnn/test/test_misc.cpp                |   1 +
 modules/dnn/test/test_precomp.hpp             |   2 +-
 modules/dnn/test/test_tf_importer.cpp         |  10 +-
 modules/dnn/test/test_torch_importer.cpp      |   5 +-
 samples/dnn/classification.cpp                |  11 +-
 samples/dnn/classification.py                 |  13 ++-
 samples/dnn/colorization.cpp                  |   2 +-
 samples/dnn/colorization.py                   |   2 +-
 samples/dnn/fast_neural_style.py              |   1 +
 samples/dnn/mobilenet_ssd_accuracy.py         |   1 +
 samples/dnn/object_detection.cpp              |  14 ++-
 samples/dnn/object_detection.py               |  13 ++-
 samples/dnn/openpose.py                       |   5 -
 samples/dnn/segmentation.cpp                  |  11 +-
 samples/dnn/segmentation.py                   |  13 ++-
 45 files changed, 252 insertions(+), 198 deletions(-)

diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index 3a1108663c..2d4e0e16ee 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -66,16 +66,22 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
 
     /**
      * @brief Enum of computation backends supported by layers.
+     * @see Net::setPreferableBackend
      */
     enum Backend
     {
+        //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if
+        //! OpenCV is built with Intel's Inference Engine library or
+        //! DNN_BACKEND_OPENCV otherwise.
         DNN_BACKEND_DEFAULT,
         DNN_BACKEND_HALIDE,
-        DNN_BACKEND_INFERENCE_ENGINE
+        DNN_BACKEND_INFERENCE_ENGINE,
+        DNN_BACKEND_OPENCV
     };
 
     /**
      * @brief Enum of target devices for computations.
+     * @see Net::setPreferableTarget
      */
     enum Target
     {
@@ -460,6 +466,9 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          * @brief Ask network to use specific computation backend where it supported.
          * @param[in] backendId backend identifier.
          * @see Backend
+         *
+         * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT
+         * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV.
          */
         CV_WRAP void setPreferableBackend(int backendId);
 
@@ -467,6 +476,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          * @brief Ask network to make computations on specific target device.
          * @param[in] targetId target identifier.
          * @see Target
+         *
+         * List of supported combinations backend / target:
+         * |                        | DNN_BACKEND_OPENCV | DNN_BACKEND_INFERENCE_ENGINE | DNN_BACKEND_HALIDE |
+         * |------------------------|--------------------|------------------------------|--------------------|
+         * | DNN_TARGET_CPU         |                  + |                            + |                  + |
+         * | DNN_TARGET_OPENCL      |                  + |                            + |                  + |
+         * | DNN_TARGET_OPENCL_FP16 |                  + |                            + |                    |
+         * | DNN_TARGET_MYRIAD      |                    |                            + |                    |
          */
         CV_WRAP void setPreferableTarget(int targetId);
 
diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index aa4ac05881..206b2c28de 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -12,7 +12,7 @@
 
 namespace opencv_test {
 
-CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE)
+CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV)
 CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD)
 
 class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<DNNBackend, DNNTarget> >
@@ -54,7 +54,7 @@ public:
     void processNet(std::string weights, std::string proto, std::string halide_scheduler,
                     const Mat& input, const std::string& outputLayer = "")
     {
-        if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
+        if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL)
         {
 #if defined(HAVE_OPENCL)
             if (!cv::ocl::useOpenCL())
@@ -149,7 +149,7 @@ PERF_TEST_P_(DNNTestNetwork, Inception_5h)
 PERF_TEST_P_(DNNTestNetwork, ENet)
 {
     if ((backend == DNN_BACKEND_INFERENCE_ENGINE) ||
-        (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL_FP16))
+        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
         throw SkipTestException("");
     processNet("dnn/Enet-model-best.net", "", "enet.yml",
             Mat(cv::Size(512, 256), CV_32FC3));
@@ -267,9 +267,9 @@ const tuple<DNNBackend, DNNTarget> testCases[] = {
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
 #endif
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_CPU),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL_FP16)
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
 };
 
 INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index a5656821c6..4d0cc654dc 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -225,7 +225,7 @@ void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
 class OpenCLBackendWrapper : public BackendWrapper
 {
 public:
-    OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)
+    OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
     {
         m.copyTo(umat);
         host = &m;
@@ -233,7 +233,7 @@ public:
     }
 
     OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
-        : BackendWrapper(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)
+        : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
     {
         Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
         CV_Assert(!base.empty());
@@ -654,7 +654,7 @@ private:
 
 static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
 {
-    if (backendId == DNN_BACKEND_DEFAULT)
+    if (backendId == DNN_BACKEND_OPENCV)
     {
         if (targetId == DNN_TARGET_CPU)
             return Ptr<BackendWrapper>();
@@ -727,7 +727,7 @@ struct Net::Impl
 
     Ptr<BackendWrapper> wrap(Mat& host)
     {
-        if (preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_CPU)
+        if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
             return Ptr<BackendWrapper>();
 
         MatShape shape(host.dims);
@@ -738,7 +738,7 @@ struct Net::Impl
         if (backendWrappers.find(data) != backendWrappers.end())
         {
             Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
-            if (preferableBackend == DNN_BACKEND_DEFAULT)
+            if (preferableBackend == DNN_BACKEND_OPENCV)
             {
                 CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
                 return OpenCLBackendWrapper::create(baseBuffer, host);
@@ -850,9 +850,27 @@ struct Net::Impl
     {
         CV_TRACE_FUNCTION();
 
+        if (preferableBackend == DNN_BACKEND_DEFAULT)
+#ifdef HAVE_INF_ENGINE
+            preferableBackend = DNN_BACKEND_INFERENCE_ENGINE;
+#else
+            preferableBackend = DNN_BACKEND_OPENCV;
+#endif
+        CV_Assert(preferableBackend != DNN_BACKEND_OPENCV ||
+                  preferableTarget == DNN_TARGET_CPU ||
+                  preferableTarget == DNN_TARGET_OPENCL ||
+                  preferableTarget == DNN_TARGET_OPENCL_FP16);
+        CV_Assert(preferableBackend != DNN_BACKEND_HALIDE ||
+                  preferableTarget == DNN_TARGET_CPU ||
+                  preferableTarget == DNN_TARGET_OPENCL);
+        CV_Assert(preferableBackend != DNN_BACKEND_INFERENCE_ENGINE ||
+                  preferableTarget == DNN_TARGET_CPU ||
+                  preferableTarget == DNN_TARGET_OPENCL ||
+                  preferableTarget == DNN_TARGET_OPENCL_FP16 ||
+                  preferableTarget == DNN_TARGET_MYRIAD);
         if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
         {
-            if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
+            if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
 #ifndef HAVE_OPENCL
             {
                 CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU.");
@@ -1036,7 +1054,7 @@ struct Net::Impl
     void initBackend()
     {
         CV_TRACE_FUNCTION();
-        if (preferableBackend == DNN_BACKEND_DEFAULT)
+        if (preferableBackend == DNN_BACKEND_OPENCV)
             CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
         else if (preferableBackend == DNN_BACKEND_HALIDE)
             initHalideBackend();
@@ -1375,7 +1393,7 @@ struct Net::Impl
         std::vector<LayerPin> pinsForInternalBlobs;
         blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
                                           preferableBackend == DNN_BACKEND_INFERENCE_ENGINE,
-                                          preferableBackend == DNN_BACKEND_DEFAULT &&
+                                          preferableBackend == DNN_BACKEND_OPENCV &&
                                           preferableTarget == DNN_TARGET_OPENCL_FP16);
         ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
         for (int i = 0; i < ld.outputBlobs.size(); ++i)
@@ -1418,7 +1436,7 @@ struct Net::Impl
 
     void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
     {
-        if( !fusion || preferableBackend != DNN_BACKEND_DEFAULT &&
+        if( !fusion || preferableBackend != DNN_BACKEND_OPENCV &&
                        preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
             return;
 
@@ -1446,7 +1464,7 @@ struct Net::Impl
             // some other layers.
 
             // TODO: OpenCL target support more fusion styles.
-            if ( preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget) &&
+            if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
                  (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
                  ld.layerInstance->type != "MVN")) )
                 continue;
@@ -1481,7 +1499,7 @@ struct Net::Impl
                         break;
                 }
 
-                if (preferableBackend != DNN_BACKEND_DEFAULT)
+                if (preferableBackend != DNN_BACKEND_OPENCV)
                     continue;  // Go to the next layer.
 
                 // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
@@ -1624,7 +1642,7 @@ struct Net::Impl
                 }
             }
 
-            if (preferableBackend != DNN_BACKEND_DEFAULT)
+            if (preferableBackend != DNN_BACKEND_OPENCV)
                 continue;  // Go to the next layer.
 
             // the optimization #2. if there is no layer that takes max pooling layer's computed
@@ -1735,7 +1753,7 @@ struct Net::Impl
         {
             CV_Assert(layers[0].outputBlobs[i].total());
             if (layers[0].outputBlobs[i].depth() == CV_32F &&
-                preferableBackend == DNN_BACKEND_DEFAULT &&
+                preferableBackend == DNN_BACKEND_OPENCV &&
                 preferableTarget == DNN_TARGET_OPENCL_FP16)
             {
                 Mat mat = layers[0].outputBlobs[i].clone();
@@ -1781,12 +1799,12 @@ struct Net::Impl
         TickMeter tm;
         tm.start();
 
-        if (preferableBackend == DNN_BACKEND_DEFAULT ||
+        if (preferableBackend == DNN_BACKEND_OPENCV ||
             !layer->supportBackend(preferableBackend))
         {
             if( !ld.skip )
             {
-                if (preferableBackend == DNN_BACKEND_DEFAULT && IS_DNN_OPENCL_TARGET(preferableTarget))
+                if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
                 {
                     std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
                     layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers),
@@ -2132,7 +2150,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
     {
         std::vector<UMat> & outputvec = *(std::vector<UMat> *)outputBlobs.getObj();
 
-        if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
+        if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
             IS_DNN_OPENCL_TARGET(impl->preferableTarget))
         {
             if (impl->preferableTarget == DNN_TARGET_OPENCL)
@@ -2270,7 +2288,7 @@ void Net::setInput(InputArray blob, const String& name)
     ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
     MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
     Mat blob_;
-    if (impl->preferableBackend == DNN_BACKEND_DEFAULT &&
+    if (impl->preferableBackend == DNN_BACKEND_OPENCV &&
         impl->preferableTarget == DNN_TARGET_OPENCL_FP16)
     {
         Mat blob_mat = blob.getMat();
@@ -2664,7 +2682,7 @@ int Layer::outputNameToIndex(const String&)
 
 bool Layer::supportBackend(int backendId)
 {
-    return backendId == DNN_BACKEND_DEFAULT;
+    return backendId == DNN_BACKEND_OPENCV;
 }
 
 Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper> > &)
diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp
index c934b1b334..d42face4ec 100644
--- a/modules/dnn/src/layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@@ -109,7 +109,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp
index 847b6228df..4cf3e96bbd 100644
--- a/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@@ -56,7 +56,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
 
diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp
index a72b28215b..145dc526fb 100644
--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@@ -103,7 +103,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding ||  // By channels
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !padding;
     }
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 400e03dab5..111bfabf11 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -81,7 +81,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
@@ -1568,6 +1568,39 @@ public:
         return Ptr<BackendNode>();
     }
 
+    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &) CV_OVERRIDE
+    {
+#ifdef HAVE_INF_ENGINE
+        const int outGroupCn = blobs[0].size[1];  // Weights are in IOHW layout
+        const int group = numOutput / outGroupCn;
+
+        InferenceEngine::LayerParams lp;
+        lp.name = name;
+        lp.type = "Deconvolution";
+        lp.precision = InferenceEngine::Precision::FP32;
+        std::shared_ptr<InferenceEngine::DeconvolutionLayer> ieLayer(new InferenceEngine::DeconvolutionLayer(lp));
+
+        ieLayer->_kernel_x = kernel.width;
+        ieLayer->_kernel_y = kernel.height;
+        ieLayer->_stride_x = stride.width;
+        ieLayer->_stride_y = stride.height;
+        ieLayer->_out_depth = numOutput;
+        ieLayer->_padding_x = pad.width;
+        ieLayer->_padding_y = pad.height;
+        ieLayer->_dilation_x = dilation.width;
+        ieLayer->_dilation_y = dilation.height;
+        ieLayer->_group = group;
+
+        ieLayer->_weights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
+        if (hasBias())
+        {
+            ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {(size_t)numOutput}, InferenceEngine::Layout::C);
+        }
+        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
+#endif  // HAVE_INF_ENGINE
+        return Ptr<BackendNode>();
+    }
+
     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
     {
diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
index ee1ad95e61..0d1d27d56f 100644
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -195,7 +195,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_locPredTransposed;
     }
 
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
index 32d39970ab..801916d9c4 100644
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -115,7 +115,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
@@ -496,8 +496,9 @@ struct TanHFunctor
 #ifdef HAVE_INF_ENGINE
     InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
     {
-        CV_Error(Error::StsNotImplemented, "TanH");
-        return InferenceEngine::CNNLayerPtr();
+        lp.type = "TanH";
+        std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
+        return ieLayer;
     }
 #endif  // HAVE_INF_ENGINE
 
diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp
index 39961abb5f..a664b79c56 100644
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -96,7 +96,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp
index f737ac242b..2fd9242ed3 100644
--- a/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
@@ -64,7 +64,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
 
diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp
index 852576903c..5152d60269 100644
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@@ -128,7 +128,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && axis == 1;
     }
diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp
index 1b2a902af0..cfa95e96e8 100644
--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@@ -90,7 +90,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp
index 09509dfb4f..98cb3595aa 100644
--- a/modules/dnn/src/layers/max_unpooling_layer.cpp
+++ b/modules/dnn/src/layers/max_unpooling_layer.cpp
@@ -34,7 +34,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() &&
                !poolPad.width && !poolPad.height;
     }
diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp
index 41740b3ec7..a846dabfb0 100644
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@ -63,7 +63,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
                pnorm == 2 && !blobs.empty();
     }
diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp
index 7c2cb0cbd0..266d887cd8 100644
--- a/modules/dnn/src/layers/padding_layer.cpp
+++ b/modules/dnn/src/layers/padding_layer.cpp
@@ -87,7 +87,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4;
     }
 
diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp
index d4f756ced5..65d78517e6 100644
--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
@@ -118,7 +118,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
 
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 548cb8acdd..eab1dcaa8a 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -135,7 +135,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() &&
                (type == MAX || type == AVE && !pad.width && !pad.height) ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && (type == MAX || type == AVE);
diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp
index 74c0d31f1d..5ccf221f1b 100644
--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@@ -270,7 +270,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
 
diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp
index f6102c4ef5..102f29874a 100644
--- a/modules/dnn/src/layers/reorg_layer.cpp
+++ b/modules/dnn/src/layers/reorg_layer.cpp
@@ -85,11 +85,6 @@ public:
         return false;
     }
 
-    virtual bool supportBackend(int backendId) CV_OVERRIDE
-    {
-        return backendId == DNN_BACKEND_DEFAULT;
-    }
-
 #ifdef HAVE_OPENCL
     bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
     {
diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp
index 6b2100cdab..65a81c7820 100644
--- a/modules/dnn/src/layers/reshape_layer.cpp
+++ b/modules/dnn/src/layers/reshape_layer.cpp
@@ -168,7 +168,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
 
diff --git a/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp b/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp
index 448ea25ee4..703b7a438f 100644
--- a/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp
+++ b/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp
@@ -42,7 +42,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
 
diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp
index 3c7d63a038..6cfa78c911 100644
--- a/modules/dnn/src/layers/scale_layer.cpp
+++ b/modules/dnn/src/layers/scale_layer.cpp
@@ -48,7 +48,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
     }
diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp
index c26028e000..135874812b 100644
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@@ -88,7 +88,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT ||
+        return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1 ||
                backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !logSoftMax;
     }
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index 8dd823e553..88b8a17958 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -40,7 +40,7 @@ public:
                     std::string halideScheduler = "",
                     double l1 = 0.0, double lInf = 0.0)
     {
-        if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
+        if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL)
         {
 #ifdef HAVE_OPENCL
             if (!cv::ocl::useOpenCL())
@@ -72,11 +72,11 @@ public:
 
         // Create two networks - with default backend and target and a tested one.
         Net netDefault = readNet(weights, proto);
-        Net net = readNet(weights, proto);
-
+        netDefault.setPreferableBackend(DNN_BACKEND_OPENCV);
         netDefault.setInput(inp);
         Mat outDefault = netDefault.forward(outputLayer).clone();
 
+        Net net = readNet(weights, proto);
         net.setInput(inp);
         net.setPreferableBackend(backend);
         net.setPreferableTarget(target);
@@ -167,7 +167,7 @@ TEST_P(DNNTestNetwork, Inception_5h)
 TEST_P(DNNTestNetwork, ENet)
 {
     if ((backend == DNN_BACKEND_INFERENCE_ENGINE) ||
-        (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL_FP16))
+        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
         throw SkipTestException("");
     processNet("dnn/Enet-model-best.net", "", Size(512, 512), "l367_Deconvolution",
                target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_enet.yml" :
@@ -181,8 +181,8 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
         throw SkipTestException("");
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
-    float l1 = (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL_FP16) ? 0.0007 : 0.0;
-    float lInf = (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL_FP16) ? 0.011 : 0.0;
+    float l1 = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.0007 : 0.0;
+    float lInf = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.011 : 0.0;
 
     processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt",
                inp, "detection_out", "", l1, lInf);
@@ -196,8 +196,8 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
         throw SkipTestException("");
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
-    float l1 = (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL_FP16) ? 0.008 : 0.0;
-    float lInf = (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL_FP16) ? 0.06 : 0.0;
+    float l1 = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.008 : 0.0;
+    float lInf = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.06 : 0.0;
     processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt",
                inp, "detection_out", "", l1, lInf);
 }
@@ -280,7 +280,7 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
 TEST_P(DNNTestNetwork, DenseNet_121)
 {
     if ((backend == DNN_BACKEND_HALIDE) ||
-        (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL_FP16) ||
+        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
         (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL_FP16 ||
                                                      target == DNN_TARGET_MYRIAD)))
         throw SkipTestException("");
@@ -298,8 +298,8 @@ const tuple<DNNBackend, DNNTarget> testCases[] = {
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
 #endif
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL_FP16)
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
 };
 
 INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, testing::ValuesIn(testCases));
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index eaf95acc9d..c99e4e63f7 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -62,6 +62,7 @@ TEST(Test_Caffe, memory_read)
     ASSERT_TRUE(readFileInMemory(model, dataModel));
 
     Net net = readNetFromCaffe(dataProto.c_str(), dataProto.size());
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     ASSERT_FALSE(net.empty());
 
     Net net2 = readNetFromCaffe(dataProto.c_str(), dataProto.size(),
@@ -108,6 +109,7 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
     const float l1 = 1e-5;
     const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-3 : 1e-4;
 
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     Mat sample = imread(_tf("grace_hopper_227.png"));
@@ -132,6 +134,7 @@ TEST(Reproducibility_FCN, Accuracy)
         net = readNetFromCaffe(proto, model);
         ASSERT_FALSE(net.empty());
     }
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat sample = imread(_tf("street.png"));
     ASSERT_TRUE(!sample.empty());
@@ -160,6 +163,7 @@ TEST(Reproducibility_SSD, Accuracy)
         net = readNetFromCaffe(proto, model);
         ASSERT_FALSE(net.empty());
     }
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat sample = imread(_tf("street.png"));
     ASSERT_TRUE(!sample.empty());
@@ -185,6 +189,7 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
     const float l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 1.5e-4 : 1e-5;
     const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-4;
 
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     Mat sample = imread(_tf("street.png"));
@@ -236,6 +241,7 @@ TEST_P(Reproducibility_ResNet50, Accuracy)
                                findDataFile("dnn/ResNet-50-model.caffemodel", false));
 
     int targetId = GetParam();
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     float l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-5 : 1e-5;
@@ -271,6 +277,7 @@ TEST_P(Reproducibility_SqueezeNet_v1_1, Accuracy)
                                findDataFile("dnn/squeezenet_v1.1.caffemodel", false));
 
     int targetId = GetParam();
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false);
@@ -302,6 +309,7 @@ TEST(Reproducibility_AlexNet_fp16, Accuracy)
 
     shrinkCaffeModel(model, "bvlc_alexnet.caffemodel_fp16");
     Net net = readNetFromCaffe(proto, "bvlc_alexnet.caffemodel_fp16");
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat sample = imread(findDataFile("dnn/grace_hopper_227.png", false));
 
@@ -321,6 +329,7 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
 
     shrinkCaffeModel(model, "bvlc_googlenet.caffemodel_fp16");
     Net net = readNetFromCaffe(proto, "bvlc_googlenet.caffemodel_fp16");
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     std::vector<Mat> inpMats;
     inpMats.push_back( imread(_tf("googlenet_0.png")) );
@@ -347,6 +356,7 @@ TEST(Reproducibility_Colorization, Accuracy)
     const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false);
     const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false);
     Net net = readNetFromCaffe(proto, model);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel);
     net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606));
@@ -367,6 +377,7 @@ TEST(Reproducibility_DenseNet_121, Accuracy)
     Mat ref = blobFromNPY(_tf("densenet_121_output.npy"));
 
     Net net = readNetFromCaffe(proto, model);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     net.setInput(inp);
     Mat out = net.forward();
@@ -378,6 +389,7 @@ TEST(Test_Caffe, multiple_inputs)
 {
     const string proto = findDataFile("dnn/layers/net_input.prototxt", false);
     Net net = readNetFromCaffe(proto);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat first_image(10, 11, CV_32FC3);
     Mat second_image(10, 11, CV_32FC3);
@@ -412,7 +424,7 @@ TEST_P(opencv_face_detector, Accuracy)
     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
 
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     net.setInput(blob);
@@ -455,6 +467,7 @@ TEST(Test_Caffe, FasterRCNN_and_RFCN)
         std::string model = findDataFile("dnn/" + models[i], false);
 
         Net net = readNetFromCaffe(proto, model);
+        net.setPreferableBackend(DNN_BACKEND_OPENCV);
         Mat img = imread(findDataFile("dnn/dog416.png", false));
         resize(img, img, Size(800, 600));
         Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false);
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index 11d2e50ef8..bc405c4080 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -74,7 +74,7 @@ static void testDarknetModel(const std::string& cfg, const std::string& weights,
                              int backendId, int targetId, float scoreDiff = 0.0,
                              float iouDiff = 0.0, float confThreshold = 0.24)
 {
-    if (backendId == DNN_BACKEND_DEFAULT && targetId == DNN_TARGET_OPENCL)
+    if (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL)
     {
   #ifdef HAVE_OPENCL
         if (!cv::ocl::useOpenCL())
@@ -197,9 +197,9 @@ const tuple<DNNBackend, DNNTarget> testCases[] = {
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_MYRIAD),
 #endif
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_CPU),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL),
-    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL_FP16)
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_CPU),
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL),
+    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)
 };
 
 INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, testing::ValuesIn(testCases));
@@ -214,6 +214,7 @@ static void testDarknetLayer(const std::string& name, bool hasWeights = false)
     Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy", false));
 
     Net net = readNet(cfg, model);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setInput(inp);
     Mat out = net.forward();
     normAssert(out, ref);
diff --git a/modules/dnn/test/test_googlenet.cpp b/modules/dnn/test/test_googlenet.cpp
index 14bf68e635..a2ea731acc 100644
--- a/modules/dnn/test/test_googlenet.cpp
+++ b/modules/dnn/test/test_googlenet.cpp
@@ -52,36 +52,23 @@ static std::string _tf(TString filename)
     return (getOpenCVExtraDir() + "/dnn/") + filename;
 }
 
-TEST(Reproducibility_GoogLeNet, Accuracy)
+typedef testing::TestWithParam<DNNTarget> Reproducibility_GoogLeNet;
+TEST_P(Reproducibility_GoogLeNet, Batching)
 {
     Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
                                findDataFile("dnn/bvlc_googlenet.caffemodel", false));
+    int targetId = GetParam();
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableTarget(targetId);
 
-    std::vector<Mat> inpMats;
-    inpMats.push_back( imread(_tf("googlenet_0.png")) );
-    inpMats.push_back( imread(_tf("googlenet_1.png")) );
-    ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
-
-    net.setInput(blobFromImages(inpMats, 1.0f, Size(), Scalar(), false), "data");
-    Mat out = net.forward("prob");
-
-    Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
-    normAssert(out, ref);
-}
-
-OCL_TEST(Reproducibility_GoogLeNet, Accuracy)
-{
-    Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
-                               findDataFile("dnn/bvlc_googlenet.caffemodel", false));
-
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
-    net.setPreferableTarget(DNN_TARGET_OPENCL);
-
-    // Initialize network for a single image in the batch but test with batch size=2.
-    Mat inp = Mat(224, 224, CV_8UC3);
-    randu(inp, -1, 1);
-    net.setInput(blobFromImage(inp));
-    net.forward();
+    if (targetId == DNN_TARGET_OPENCL)
+    {
+        // Initialize network for a single image in the batch but test with batch size=2.
+        Mat inp = Mat(224, 224, CV_8UC3);
+        randu(inp, -1, 1);
+        net.setInput(blobFromImage(inp));
+        net.forward();
+    }
 
     std::vector<Mat> inpMats;
     inpMats.push_back( imread(_tf("googlenet_0.png")) );
@@ -95,10 +82,13 @@ OCL_TEST(Reproducibility_GoogLeNet, Accuracy)
     normAssert(out, ref);
 }
 
-TEST(IntermediateBlobs_GoogLeNet, Accuracy)
+TEST_P(Reproducibility_GoogLeNet, IntermediateBlobs)
 {
     Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
                                findDataFile("dnn/bvlc_googlenet.caffemodel", false));
+    int targetId = GetParam();
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableTarget(targetId);
 
     std::vector<String> blobsNames;
     blobsNames.push_back("conv1/7x7_s2");
@@ -121,39 +111,13 @@ TEST(IntermediateBlobs_GoogLeNet, Accuracy)
     }
 }
 
-OCL_TEST(IntermediateBlobs_GoogLeNet, Accuracy)
-{
-    Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
-                               findDataFile("dnn/bvlc_googlenet.caffemodel", false));
-
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
-    net.setPreferableTarget(DNN_TARGET_OPENCL);
-
-    std::vector<String> blobsNames;
-    blobsNames.push_back("conv1/7x7_s2");
-    blobsNames.push_back("conv1/relu_7x7");
-    blobsNames.push_back("inception_4c/1x1");
-    blobsNames.push_back("inception_4c/relu_1x1");
-    std::vector<Mat> outs;
-    Mat in = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(), Scalar(), false);
-    net.setInput(in, "data");
-    net.forward(outs, blobsNames);
-    CV_Assert(outs.size() == blobsNames.size());
-
-    for (size_t i = 0; i < blobsNames.size(); i++)
-    {
-        std::string filename = blobsNames[i];
-        std::replace( filename.begin(), filename.end(), '/', '#');
-        Mat ref = blobFromNPY(_tf("googlenet_" + filename + ".npy"));
-
-        normAssert(outs[i], ref, "", 1E-4, 1E-2);
-    }
-}
-
-TEST(SeveralCalls_GoogLeNet, Accuracy)
+TEST_P(Reproducibility_GoogLeNet, SeveralCalls)
 {
     Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
                                findDataFile("dnn/bvlc_googlenet.caffemodel", false));
+    int targetId = GetParam();
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
+    net.setPreferableTarget(targetId);
 
     std::vector<Mat> inpMats;
     inpMats.push_back( imread(_tf("googlenet_0.png")) );
@@ -179,36 +143,6 @@ TEST(SeveralCalls_GoogLeNet, Accuracy)
     normAssert(outs[0], ref, "", 1E-4, 1E-2);
 }
 
-OCL_TEST(SeveralCalls_GoogLeNet, Accuracy)
-{
-    Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt", false),
-                               findDataFile("dnn/bvlc_googlenet.caffemodel", false));
-
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
-    net.setPreferableTarget(DNN_TARGET_OPENCL);
-
-    std::vector<Mat> inpMats;
-    inpMats.push_back( imread(_tf("googlenet_0.png")) );
-    inpMats.push_back( imread(_tf("googlenet_1.png")) );
-    ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
-
-    net.setInput(blobFromImages(inpMats, 1.0f, Size(), Scalar(), false), "data");
-    Mat out = net.forward();
-
-    Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
-    normAssert(out, ref);
-
-    std::vector<String> blobsNames;
-    blobsNames.push_back("conv1/7x7_s2");
-    std::vector<Mat> outs;
-    Mat in = blobFromImage(inpMats[0], 1.0f, Size(), Scalar(), false);
-    net.setInput(in, "data");
-    net.forward(outs, blobsNames);
-    CV_Assert(outs.size() == blobsNames.size());
-
-    ref = blobFromNPY(_tf("googlenet_conv1#7x7_s2.npy"));
-
-    normAssert(outs[0], ref, "", 1E-4, 1E-2);
-}
+INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_GoogLeNet, availableDnnTargets());
 
 }} // namespace
diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp
index 718f9439e6..2d137c5eda 100644
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@@ -26,6 +26,7 @@ static void test(LayerParams& params, Mat& input)
     net.connect(0, 0, lid, 0);
 
     net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat outputDefault = net.forward(params.name).clone();
 
     net.setPreferableBackend(DNN_BACKEND_HALIDE);
@@ -368,6 +369,7 @@ TEST(MaxPoolUnpool_Halide, Accuracy)
     Mat input({1, 1, 4, 4}, CV_32F);
     randu(input, -1.0f, 1.0f);
     net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat outputDefault = net.forward("testUnpool").clone();
 
     net.setPreferableBackend(DNN_BACKEND_HALIDE);
@@ -401,6 +403,7 @@ void testInPlaceActivation(LayerParams& lp)
     Mat input({1, kNumChannels, 10, 10}, CV_32F);
     randu(input, -1.0f, 1.0f);
     net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat outputDefault = net.forward(lp.name).clone();
 
     net.setInput(input);
@@ -579,6 +582,7 @@ TEST_P(Concat, Accuracy)
     randu(input, -1.0f, 1.0f);
 
     net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat outputDefault = net.forward(concatParam.name).clone();
 
     net.setPreferableBackend(DNN_BACKEND_HALIDE);
@@ -655,6 +659,7 @@ TEST_P(Eltwise, Accuracy)
     randu(input, -1.0f, 1.0f);
 
     net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat outputDefault = net.forward(eltwiseParam.name).clone();
 
     net.setPreferableBackend(DNN_BACKEND_HALIDE);
@@ -698,6 +703,7 @@ TEST(MixedBackends_Halide_Default_Halide, Accuracy)
     Mat input({4, 3, 5, 6}, CV_32F);
     randu(input, -1.0f, 1.0f);
     net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat outputDefault = net.forward().clone();
 
     net.setPreferableBackend(DNN_BACKEND_HALIDE);
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 5cbfba5517..ccba88378c 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -105,7 +105,7 @@ void testLayerUsingCaffeModels(String basename, int targetId = DNN_TARGET_CPU,
     Net net = readNetFromCaffe(prototxt, (useCaffeModel) ? caffemodel : String());
     ASSERT_FALSE(net.empty());
 
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     Mat inp = blobFromNPY(inpfile);
@@ -260,6 +260,7 @@ TEST(Layer_Test_Fused_Concat, Accuracy)
     randu(input, 0.0f, 1.0f);  // [0, 1] to make AbsVal an identity transformation.
 
     net.setInput(input);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat out = net.forward();
 
     normAssert(slice(out, Range::all(), Range(0, 2), Range::all(), Range::all()), input);
@@ -308,7 +309,7 @@ static void test_Reshape_Split_Slice_layers(int targetId)
     Net net = readNetFromCaffe(_tf("reshape_and_slice_routines.prototxt"));
     ASSERT_FALSE(net.empty());
 
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     Mat input(6, 12, CV_32F);
@@ -335,6 +336,7 @@ TEST(Layer_Conv_Elu, Accuracy)
     Mat ref = blobFromNPY(_tf("layer_elu_out.npy"));
 
     net.setInput(inp, "input");
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat out = net.forward();
 
     normAssert(ref, out);
@@ -502,6 +504,7 @@ void testLayerUsingDarknetModels(String basename, bool useDarknetModel = false,
     Mat ref = blobFromNPY(outfile);
 
     net.setInput(inp, "data");
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat out = net.forward();
 
     normAssert(ref, out);
@@ -527,6 +530,7 @@ TEST(Layer_Test_ROIPooling, Accuracy)
 
     net.setInput(inp, "input");
     net.setInput(rois, "rois");
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat out = net.forward();
 
@@ -547,6 +551,7 @@ TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
     net.setInput(imInfo, "im_info");
 
     std::vector<Mat> outs;
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.forward(outs, "output");
 
     for (int i = 0; i < 2; ++i)
@@ -614,6 +619,7 @@ TEST_P(Scale_untrainable, Accuracy)
     net.setInputsNames(inpNames);
     net.setInput(input, inpNames[0]);
     net.setInput(weights, inpNames[1]);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat out = net.forward();
 
     Mat ref(input.dims, input.size, CV_32F);
@@ -681,6 +687,7 @@ TEST_P(Crop, Accuracy)
     net.setInputsNames(inpNames);
     net.setInput(inpImage, inpNames[0]);
     net.setInput(sizImage, inpNames[1]);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     // There are a few conditions that represent invalid input to the crop
     // layer, so in those cases we want to verify an exception is thrown.
@@ -744,6 +751,7 @@ TEST(Layer_Test_Average_pooling_kernel_area, Accuracy)
     Mat target = (Mat_<float>(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9);
     Mat tmp = blobFromImage(inp);
     net.setInput(blobFromImage(inp));
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat out = net.forward();
     normAssert(out, blobFromImage(target));
 }
@@ -768,6 +776,7 @@ TEST(Layer_PriorBox, squares)
     Mat inp(1, 2, CV_32F);
     randu(inp, -1, 1);
     net.setInput(blobFromImage(inp));
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat out = net.forward();
 
     Mat target = (Mat_<float>(4, 4) << 0.0, 0.0, 0.75, 1.0,
@@ -789,6 +798,7 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
     Mat inp = blobFromNPY(_tf("blob.npy"));
 
     netDefault.setInput(inp);
+    netDefault.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat outDefault = netDefault.forward();
 
     net.setInput(inp);
@@ -847,7 +857,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_DEFAULT;
+        return backendId == DNN_BACKEND_OPENCV;
     }
 
     virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals) CV_OVERRIDE {}
diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp
index b2fb71bcaf..aff79bf0ec 100644
--- a/modules/dnn/test/test_misc.cpp
+++ b/modules/dnn/test/test_misc.cpp
@@ -128,6 +128,7 @@ TEST(LayerFactory, custom_layers)
         net.addLayerToPrev(lp.name, lp.type, lp);
 
         net.setInput(inp);
+        net.setPreferableBackend(DNN_BACKEND_OPENCV);
         Mat output = net.forward();
 
         if (i == 0)      EXPECT_EQ(output.at<float>(0), 1);
diff --git a/modules/dnn/test/test_precomp.hpp b/modules/dnn/test/test_precomp.hpp
index 062308bf67..91a5e60961 100644
--- a/modules/dnn/test/test_precomp.hpp
+++ b/modules/dnn/test/test_precomp.hpp
@@ -52,7 +52,7 @@
 namespace opencv_test {
 using namespace cv::dnn;
 
-CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE)
+CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, DNN_BACKEND_OPENCV)
 CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16, DNN_TARGET_MYRIAD)
 
 static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets()
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 84205f72fb..2690d7242b 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -34,6 +34,7 @@ TEST(Test_TensorFlow, read_inception)
         net = readNetFromTensorflow(model);
         ASSERT_FALSE(net.empty());
     }
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat sample = imread(_tf("grace_hopper_227.png"));
     ASSERT_TRUE(!sample.empty());
@@ -57,6 +58,7 @@ TEST(Test_TensorFlow, inception_accuracy)
         net = readNetFromTensorflow(model);
         ASSERT_FALSE(net.empty());
     }
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat sample = imread(_tf("grace_hopper_227.png"));
     ASSERT_TRUE(!sample.empty());
@@ -104,7 +106,7 @@ static void runTensorFlowNet(const std::string& prefix, int targetId = DNN_TARGE
 
     ASSERT_FALSE(net.empty());
 
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     cv::Mat input = blobFromNPY(inpPath);
@@ -234,7 +236,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
     }
 
     Net net = readNetFromTensorflow(netPath, netConfig);
-
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(GetParam());
 
     net.setInput(inp);
@@ -256,6 +258,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
     Mat img = imread(findDataFile("dnn/street.png", false));
     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
 
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(GetParam());
 
     net.setInput(blob);
@@ -276,6 +279,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_Faster_RCNN)
     std::string model = findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", false);
 
     Net net = readNetFromTensorflow(model, proto);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     Mat img = imread(findDataFile("dnn/dog416.png", false));
     Mat blob = blobFromImage(img, 1.0f / 127.5, Size(800, 600), Scalar(127.5, 127.5, 127.5), true, false);
 
@@ -295,6 +299,7 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
 
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(GetParam());
 
     net.setInput(blob);
@@ -526,6 +531,7 @@ TEST(Test_TensorFlow, EAST_text_detection)
     std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
 
     Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
 
     Mat img = imread(imgPath);
     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
index 33e0e94801..6d03b3e484 100644
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -77,7 +77,7 @@ static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String out
     Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);
     ASSERT_FALSE(net.empty());
 
-    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(targetId);
 
     Mat inp, outRef;
@@ -215,6 +215,7 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy)
     const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
     Net net = readNetFromTorch(model);
 
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(GetParam());
 
     Mat sample = imread(findDataFile("cv/shared/lena.png", false));
@@ -241,6 +242,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
         ASSERT_TRUE(!net.empty());
     }
 
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setPreferableTarget(GetParam());
 
     Mat sample = imread(_tf("street.png", false));
@@ -287,6 +289,7 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
         const string model = findDataFile(models[i], false);
         Net net = readNetFromTorch(model);
 
+        net.setPreferableBackend(DNN_BACKEND_OPENCV);
         net.setPreferableTarget(GetParam());
 
         Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
diff --git a/samples/dnn/classification.cpp b/samples/dnn/classification.cpp
index 21e9520743..7f8aa74b83 100644
--- a/samples/dnn/classification.cpp
+++ b/samples/dnn/classification.cpp
@@ -21,12 +21,15 @@ const char* keys =
     "{ height      |   | Preprocess input image by resizing to a specific height. }"
     "{ rgb         |   | Indicate that model works with RGB input images instead BGR ones. }"
     "{ backend     | 0 | Choose one of computation backends: "
-                        "0: default C++ backend, "
+                        "0: automatically (by default), "
                         "1: Halide language (http://halide-lang.org/), "
-                        "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)}"
+                        "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                        "3: OpenCV implementation }"
     "{ target      | 0 | Choose one of target computation devices: "
-                        "0: CPU target (by default),"
-                        "1: OpenCL }";
+                        "0: CPU target (by default), "
+                        "1: OpenCL, "
+                        "2: OpenCL fp16 (half-float precision), "
+                        "3: VPU }";
 
 using namespace cv;
 using namespace dnn;
diff --git a/samples/dnn/classification.py b/samples/dnn/classification.py
index 637309fe25..9a610d1ab7 100644
--- a/samples/dnn/classification.py
+++ b/samples/dnn/classification.py
@@ -3,8 +3,8 @@ import argparse
 import numpy as np
 import sys
 
-backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL)
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
 
 parser = argparse.ArgumentParser(description='Use this script to run classification deep learning networks using OpenCV.')
 parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
@@ -32,13 +32,16 @@ parser.add_argument('--rgb', action='store_true',
                     help='Indicate that model works with RGB input images instead BGR ones.')
 parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
                     help="Choose one of computation backends: "
-                         "%d: default C++ backend, "
+                         "%d: automatically (by default), "
                          "%d: Halide language (http://halide-lang.org/), "
-                         "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)" % backends)
+                         "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                         "%d: OpenCV implementation" % backends)
 parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
                     help='Choose one of target computation devices: '
                          '%d: CPU target (by default), '
-                         '%d: OpenCL' % targets)
+                         '%d: OpenCL, '
+                         '%d: OpenCL fp16 (half-float precision), '
+                         '%d: VPU' % targets)
 args = parser.parse_args()
 
 # Load names of classes
diff --git a/samples/dnn/colorization.cpp b/samples/dnn/colorization.cpp
index 9329e11d6a..3f1c66127f 100644
--- a/samples/dnn/colorization.cpp
+++ b/samples/dnn/colorization.cpp
@@ -107,7 +107,7 @@ int main(int argc, char **argv)
     // run the L channel through the network
     Mat inputBlob = blobFromImage(input);
     net.setInput(inputBlob);
-    Mat result = net.forward("class8_ab");
+    Mat result = net.forward();
 
     // retrieve the calculated a,b channels from the network output
     Size siz(result.size[2], result.size[3]);
diff --git a/samples/dnn/colorization.py b/samples/dnn/colorization.py
index d55ac45093..c9eb2af3b6 100644
--- a/samples/dnn/colorization.py
+++ b/samples/dnn/colorization.py
@@ -56,7 +56,7 @@ if __name__ == '__main__':
         img_l_rs -= 50 # subtract 50 for mean-centering
 
         net.setInput(cv.dnn.blobFromImage(img_l_rs))
-        ab_dec = net.forward('class8_ab')[0,:,:,:].transpose((1,2,0)) # this is our result
+        ab_dec = net.forward()[0,:,:,:].transpose((1,2,0)) # this is our result
 
         (H_out,W_out) = ab_dec.shape[:2]
         ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig))
diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py
index 5aac8f3fd4..ab5d67f5fd 100644
--- a/samples/dnn/fast_neural_style.py
+++ b/samples/dnn/fast_neural_style.py
@@ -14,6 +14,7 @@ parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of
 args = parser.parse_args()
 
 net = cv.dnn.readNetFromTorch(args.model)
+net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV);
 
 if args.input:
     cap = cv.VideoCapture(args.input)
diff --git a/samples/dnn/mobilenet_ssd_accuracy.py b/samples/dnn/mobilenet_ssd_accuracy.py
index 7c937dd1c8..c522c5a3c9 100644
--- a/samples/dnn/mobilenet_ssd_accuracy.py
+++ b/samples/dnn/mobilenet_ssd_accuracy.py
@@ -27,6 +27,7 @@ args = parser.parse_args()
 
 ### Get OpenCV predictions #####################################################
 net = cv.dnn.readNetFromTensorflow(args.weights, args.prototxt)
+net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV);
 
 detections = []
 for imgName in os.listdir(args.images):
diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp
index f2b761b387..084d41bb5f 100644
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@@ -23,12 +23,16 @@ const char* keys =
     "{ rgb         |    | Indicate that model works with RGB input images instead BGR ones. }"
     "{ thr         | .5 | Confidence threshold. }"
     "{ backend     |  0 | Choose one of computation backends: "
-                         "0: default C++ backend, "
+                         "0: automatically (by default), "
                          "1: Halide language (http://halide-lang.org/), "
-                         "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)}"
-    "{ target      |  0 | Choose one of target computation devices: "
-                         "0: CPU target (by default),"
-                         "1: OpenCL }";
+                         "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                         "3: OpenCV implementation }"
+    "{ target      | 0 | Choose one of target computation devices: "
+                         "0: CPU target (by default), "
+                         "1: OpenCL, "
+                         "2: OpenCL fp16 (half-float precision), "
+                         "3: VPU }";
+
 
 using namespace cv;
 using namespace dnn;
diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py
index a299b558e7..1d2811bacf 100644
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@@ -3,8 +3,8 @@ import argparse
 import sys
 import numpy as np
 
-backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL)
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
 
 parser = argparse.ArgumentParser(description='Use this script to run object detection deep learning networks using OpenCV.')
 parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
@@ -33,13 +33,16 @@ parser.add_argument('--rgb', action='store_true',
 parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
 parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
                     help="Choose one of computation backends: "
-                         "%d: default C++ backend, "
+                         "%d: automatically (by default), "
                          "%d: Halide language (http://halide-lang.org/), "
-                         "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)" % backends)
+                         "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                         "%d: OpenCV implementation" % backends)
 parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
                     help='Choose one of target computation devices: '
                          '%d: CPU target (by default), '
-                         '%d: OpenCL' % targets)
+                         '%d: OpenCL, '
+                         '%d: OpenCL fp16 (half-float precision), '
+                         '%d: VPU' % targets)
 args = parser.parse_args()
 
 # Load names of classes
diff --git a/samples/dnn/openpose.py b/samples/dnn/openpose.py
index 4f1b01b8ce..4f367c10ec 100644
--- a/samples/dnn/openpose.py
+++ b/samples/dnn/openpose.py
@@ -16,9 +16,6 @@ parser.add_argument('--dataset', help='Specify what kind of model was trained. '
 parser.add_argument('--thr', default=0.1, type=float, help='Threshold value for pose parts heat map')
 parser.add_argument('--width', default=368, type=int, help='Resize input to specific width.')
 parser.add_argument('--height', default=368, type=int, help='Resize input to specific height.')
-parser.add_argument('--inf_engine', action='store_true',
-                    help='Enable Intel Inference Engine computational backend. '
-                         'Check that plugins folder is in LD_LIBRARY_PATH environment variable')
 
 args = parser.parse_args()
 
@@ -49,8 +46,6 @@ inWidth = args.width
 inHeight = args.height
 
 net = cv.dnn.readNetFromCaffe(args.proto, args.model)
-if args.inf_engine:
-    net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
 
 cap = cv.VideoCapture(args.input if args.input else 0)
 
diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp
index 920e325b83..ce2147acd6 100644
--- a/samples/dnn/segmentation.cpp
+++ b/samples/dnn/segmentation.cpp
@@ -24,12 +24,15 @@ const char* keys =
     "{ height      |   | Preprocess input image by resizing to a specific height. }"
     "{ rgb         |   | Indicate that model works with RGB input images instead BGR ones. }"
     "{ backend     | 0 | Choose one of computation backends: "
-                        "0: default C++ backend, "
+                        "0: automatically (by default), "
                         "1: Halide language (http://halide-lang.org/), "
-                        "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)}"
+                        "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                        "3: OpenCV implementation }"
     "{ target      | 0 | Choose one of target computation devices: "
-                        "0: CPU target (by default),"
-                        "1: OpenCL }";
+                        "0: CPU target (by default), "
+                        "1: OpenCL, "
+                        "2: OpenCL fp16 (half-float precision), "
+                        "3: VPU }";
 
 using namespace cv;
 using namespace dnn;
diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py
index 3649bbbe22..b615b96028 100644
--- a/samples/dnn/segmentation.py
+++ b/samples/dnn/segmentation.py
@@ -3,8 +3,8 @@ import argparse
 import numpy as np
 import sys
 
-backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL)
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
 
 parser = argparse.ArgumentParser(description='Use this script to run semantic segmentation deep learning networks using OpenCV.')
 parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
@@ -34,13 +34,16 @@ parser.add_argument('--rgb', action='store_true',
                     help='Indicate that model works with RGB input images instead BGR ones.')
 parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
                     help="Choose one of computation backends: "
-                         "%d: default C++ backend, "
+                         "%d: automatically (by default), "
                          "%d: Halide language (http://halide-lang.org/), "
-                         "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)" % backends)
+                         "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+                         "%d: OpenCV implementation" % backends)
 parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
                     help='Choose one of target computation devices: '
                          '%d: CPU target (by default), '
-                         '%d: OpenCL' % targets)
+                         '%d: OpenCL, '
+                         '%d: OpenCL fp16 (half-float precision), '
+                         '%d: VPU' % targets)
 args = parser.parse_args()
 
 np.random.seed(324)

From 2628662586471019d708b62c11f40627c3578fe0 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Mon, 4 Jun 2018 19:05:45 +0300
Subject: [PATCH 02/33] videoio(ffmpeg): cleanup, remove dead code

---
 modules/videoio/src/cap_ffmpeg_api.hpp | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/modules/videoio/src/cap_ffmpeg_api.hpp b/modules/videoio/src/cap_ffmpeg_api.hpp
index 96bb8ee47c..216328e535 100644
--- a/modules/videoio/src/cap_ffmpeg_api.hpp
+++ b/modules/videoio/src/cap_ffmpeg_api.hpp
@@ -32,29 +32,18 @@ typedef struct CvCapture_FFMPEG CvCapture_FFMPEG;
 typedef struct CvVideoWriter_FFMPEG CvVideoWriter_FFMPEG;
 
 OPENCV_FFMPEG_API struct CvCapture_FFMPEG* cvCreateFileCapture_FFMPEG(const char* filename);
-OPENCV_FFMPEG_API struct CvCapture_FFMPEG_2* cvCreateFileCapture_FFMPEG_2(const char* filename);
 OPENCV_FFMPEG_API int cvSetCaptureProperty_FFMPEG(struct CvCapture_FFMPEG* cap,
                                                   int prop, double value);
-OPENCV_FFMPEG_API int cvSetCaptureProperty_FFMPEG_2(struct CvCapture_FFMPEG_2* cap,
-                                                    int prop, double value);
 OPENCV_FFMPEG_API double cvGetCaptureProperty_FFMPEG(struct CvCapture_FFMPEG* cap, int prop);
-OPENCV_FFMPEG_API double cvGetCaptureProperty_FFMPEG_2(struct CvCapture_FFMPEG_2* cap, int prop);
 OPENCV_FFMPEG_API int cvGrabFrame_FFMPEG(struct CvCapture_FFMPEG* cap);
-OPENCV_FFMPEG_API int cvGrabFrame_FFMPEG_2(struct CvCapture_FFMPEG_2* cap);
 OPENCV_FFMPEG_API int cvRetrieveFrame_FFMPEG(struct CvCapture_FFMPEG* capture, unsigned char** data,
                                              int* step, int* width, int* height, int* cn);
-OPENCV_FFMPEG_API int cvRetrieveFrame_FFMPEG_2(struct CvCapture_FFMPEG_2* capture, unsigned char** data,
-                                             int* step, int* width, int* height, int* cn);
 OPENCV_FFMPEG_API void cvReleaseCapture_FFMPEG(struct CvCapture_FFMPEG** cap);
-OPENCV_FFMPEG_API void cvReleaseCapture_FFMPEG_2(struct CvCapture_FFMPEG_2** cap);
+
 OPENCV_FFMPEG_API struct CvVideoWriter_FFMPEG* cvCreateVideoWriter_FFMPEG(const char* filename,
             int fourcc, double fps, int width, int height, int isColor );
-OPENCV_FFMPEG_API struct CvVideoWriter_FFMPEG_2* cvCreateVideoWriter_FFMPEG_2(const char* filename,
-            int fourcc, double fps, int width, int height, int isColor );
-
 OPENCV_FFMPEG_API int cvWriteFrame_FFMPEG(struct CvVideoWriter_FFMPEG* writer, const unsigned char* data,
                                           int step, int width, int height, int cn, int origin);
-
 OPENCV_FFMPEG_API void cvReleaseVideoWriter_FFMPEG(struct CvVideoWriter_FFMPEG** writer);
 
 typedef CvCapture_FFMPEG* (*CvCreateFileCapture_Plugin)( const char* filename );

From d041b0a599c504f0237ed755b2fcfc245b8aa0bb Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Mon, 4 Jun 2018 19:48:51 +0300
Subject: [PATCH 03/33] videoio: fix capture modes

---
 modules/videoio/src/videoio_registry.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/videoio/src/videoio_registry.cpp b/modules/videoio/src/videoio_registry.cpp
index c994c71a48..1f990b11c0 100644
--- a/modules/videoio/src/videoio_registry.cpp
+++ b/modules/videoio/src/videoio_registry.cpp
@@ -70,7 +70,7 @@ static const struct VideoBackendInfo builtin_backends[] =
 
     // Windows
 #ifdef WINRT_VIDEO
-    DECLARE_BACKEND(CAP_WINRT, "WINRT", MODE_CAPTURE_BY_FILENAME),
+    DECLARE_BACKEND(CAP_WINRT, "WINRT", MODE_CAPTURE_BY_INDEX),
 #endif
 #ifdef HAVE_MSMF
     DECLARE_BACKEND(CAP_MSMF, "MSMF", MODE_CAPTURE_ALL | MODE_WRITER),
@@ -79,7 +79,7 @@ static const struct VideoBackendInfo builtin_backends[] =
     DECLARE_BACKEND(CAP_VFW, "VFW", MODE_CAPTURE_ALL | MODE_WRITER),
 #endif
 #ifdef HAVE_DSHOW
-    DECLARE_BACKEND(CAP_DSHOW, "DSHOW", MODE_CAPTURE_ALL),
+    DECLARE_BACKEND(CAP_DSHOW, "DSHOW", MODE_CAPTURE_BY_INDEX),
 #endif
 
     // Linux, some Unix
@@ -98,7 +98,7 @@ static const struct VideoBackendInfo builtin_backends[] =
     DECLARE_BACKEND(CAP_OPENNI2, "OPENNI2", MODE_CAPTURE_ALL),
 #endif
 #ifdef HAVE_INTELPERC
-    DECLARE_BACKEND(CAP_INTELPERC, "INTEL_PERC", MODE_CAPTURE_ALL),
+    DECLARE_BACKEND(CAP_INTELPERC, "INTEL_PERC", MODE_CAPTURE_BY_INDEX),
 #endif
 
     // OpenCV file-based only
@@ -107,23 +107,23 @@ static const struct VideoBackendInfo builtin_backends[] =
 
     // special interfaces / stereo cameras / other SDKs
 #if defined(HAVE_DC1394_2) || defined(HAVE_DC1394) || defined(HAVE_CMU1394)
-    DECLARE_BACKEND(CAP_FIREWIRE, "FIREWIRE", MODE_CAPTURE_ALL),
+    DECLARE_BACKEND(CAP_FIREWIRE, "FIREWIRE", MODE_CAPTURE_BY_INDEX),
 #endif
     // GigE
 #ifdef HAVE_PVAPI
-    DECLARE_BACKEND(CAP_PVAPI, "PVAPI", MODE_CAPTURE_ALL),
+    DECLARE_BACKEND(CAP_PVAPI, "PVAPI", MODE_CAPTURE_BY_INDEX),
 #endif
 #ifdef HAVE_XIMEA
     DECLARE_BACKEND(CAP_XIAPI, "XIMEA", MODE_CAPTURE_ALL),
 #endif
 #ifdef HAVE_GIGE_API
-    DECLARE_BACKEND(CAP_GIGANETIX, "GIGANETIX", MODE_CAPTURE_ALL),
+    DECLARE_BACKEND(CAP_GIGANETIX, "GIGANETIX", MODE_CAPTURE_BY_INDEX),
 #endif
 #ifdef HAVE_ARAVIS_API
-    DECLARE_BACKEND(CAP_ARAVIS, "ARAVIS", MODE_CAPTURE_ALL),
+    DECLARE_BACKEND(CAP_ARAVIS, "ARAVIS", MODE_CAPTURE_BY_INDEX),
 #endif
 #ifdef HAVE_UNICAP
-    DECLARE_BACKEND(CAP_UNICAP, "UNICAP", MODE_CAPTURE_BY_FILENAME),
+    DECLARE_BACKEND(CAP_UNICAP, "UNICAP", MODE_CAPTURE_BY_INDEX),
 #endif
 
 #ifdef HAVE_GPHOTO2

From 277e9279ccef4fd601cb0d642668960da645a8c9 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Mon, 4 Jun 2018 18:38:24 +0000
Subject: [PATCH 04/33] videoio(gstreamer): support dumping of VideoWriter
 pipeline

---
 modules/videoio/src/cap_gstreamer.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp
index b18756e580..c3100f56f3 100644
--- a/modules/videoio/src/cap_gstreamer.cpp
+++ b/modules/videoio/src/cap_gstreamer.cpp
@@ -1657,6 +1657,8 @@ bool CvVideoWriter_GStreamer::open( const char * filename, int fourcc,
     }
 #endif
 
+    GST_DEBUG_BIN_TO_DOT_FILE(GST_BIN(pipeline), GST_DEBUG_GRAPH_SHOW_ALL, "write-pipeline");
+
     stateret = gst_element_set_state(GST_ELEMENT(pipeline), GST_STATE_PLAYING);
     if(stateret  == GST_STATE_CHANGE_FAILURE) {
         handleMessage(pipeline);

From 6816495bee17381b55db2227f9c9be084486fef3 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Tue, 5 Jun 2018 12:48:35 +0300
Subject: [PATCH 05/33] dnn(test): reuse test/test_common.hpp, eliminate dead
 code warning

---
 modules/dnn/perf/perf_net.cpp    | 24 ++----------------------
 modules/dnn/test/test_common.hpp | 15 ++++++++-------
 2 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index aa4ac05881..1386f47656 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -10,6 +10,8 @@
 
 #include "opencv2/dnn/shape_utils.hpp"
 
+#include "../test/test_common.hpp"
+
 namespace opencv_test {
 
 CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE)
@@ -29,28 +31,6 @@ public:
         target = (dnn::Target)(int)get<1>(GetParam());
     }
 
-    static bool checkMyriadTarget()
-    {
-#ifndef HAVE_INF_ENGINE
-        return false;
-#endif
-        cv::dnn::Net net;
-        cv::dnn::LayerParams lp;
-        net.addLayerToPrev("testLayer", "Identity", lp);
-        net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
-        net.setPreferableTarget(cv::dnn::DNN_TARGET_MYRIAD);
-        net.setInput(cv::Mat::zeros(1, 1, CV_32FC1));
-        try
-        {
-            net.forward();
-        }
-        catch(...)
-        {
-            return false;
-        }
-        return true;
-    }
-
     void processNet(std::string weights, std::string proto, std::string halide_scheduler,
                     const Mat& input, const std::string& outputLayer = "")
     {
diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp
index 8e8ea74d83..519bf7131c 100644
--- a/modules/dnn/test/test_common.hpp
+++ b/modules/dnn/test/test_common.hpp
@@ -42,12 +42,12 @@
 #ifndef __OPENCV_TEST_COMMON_HPP__
 #define __OPENCV_TEST_COMMON_HPP__
 
-inline const std::string &getOpenCVExtraDir()
+static inline const std::string &getOpenCVExtraDir()
 {
     return cvtest::TS::ptr()->get_data_path();
 }
 
-inline void normAssert(cv::InputArray ref, cv::InputArray test, const char *comment = "",
+static inline void normAssert(cv::InputArray ref, cv::InputArray test, const char *comment = "",
                        double l1 = 0.00001, double lInf = 0.0001)
 {
     double normL1 = cvtest::norm(ref, test, cv::NORM_L1) / ref.getMat().total();
@@ -74,7 +74,7 @@ static std::vector<cv::Rect2d> matToBoxes(const cv::Mat& m)
     return boxes;
 }
 
-inline void normAssertDetections(const std::vector<int>& refClassIds,
+static inline void normAssertDetections(const std::vector<int>& refClassIds,
                                  const std::vector<float>& refScores,
                                  const std::vector<cv::Rect2d>& refBoxes,
                                  const std::vector<int>& testClassIds,
@@ -128,7 +128,7 @@ inline void normAssertDetections(const std::vector<int>& refClassIds,
 // For SSD-based object detection networks which produce output of shape 1x1xNx7
 // where N is a number of detections and an every detection is represented by
 // a vector [batchId, classId, confidence, left, top, right, bottom].
-inline void normAssertDetections(cv::Mat ref, cv::Mat out, const char *comment = "",
+static inline void normAssertDetections(cv::Mat ref, cv::Mat out, const char *comment = "",
                                  double confThreshold = 0.0, double scores_diff = 1e-5,
                                  double boxes_iou_diff = 1e-4)
 {
@@ -147,11 +147,11 @@ inline void normAssertDetections(cv::Mat ref, cv::Mat out, const char *comment =
                          testBoxes, comment, confThreshold, scores_diff, boxes_iou_diff);
 }
 
-inline bool checkMyriadTarget()
+static inline bool checkMyriadTarget()
 {
 #ifndef HAVE_INF_ENGINE
     return false;
-#endif
+#else
     cv::dnn::Net net;
     cv::dnn::LayerParams lp;
     net.addLayerToPrev("testLayer", "Identity", lp);
@@ -167,9 +167,10 @@ inline bool checkMyriadTarget()
         return false;
     }
     return true;
+#endif
 }
 
-inline bool readFileInMemory(const std::string& filename, std::string& content)
+static inline bool readFileInMemory(const std::string& filename, std::string& content)
 {
     std::ios::openmode mode = std::ios::in | std::ios::binary;
     std::ifstream ifs(filename.c_str(), mode);

From 9492f46d77b0f4904e8a5eefc34e671609c9d728 Mon Sep 17 00:00:00 2001
From: Peter Rekdal Sunde <peters@users.noreply.github.com>
Date: Tue, 5 Jun 2018 10:37:40 +0200
Subject: [PATCH 06/33] 3rdparty: update libwebp 0.6.1 => 1.0.0

Commit: https://github.com/webmproject/libwebp/commit/698b8844e38a0c5ca50bb20f866e71291bfc3b36
Tag: https://github.com/webmproject/libwebp/releases/tag/v1.0.0
---
 3rdparty/libwebp/src/dec/frame_dec.c          |  12 +-
 3rdparty/libwebp/src/dec/vp8_dec.c            |   2 +-
 3rdparty/libwebp/src/dec/vp8i_dec.h           |   6 +-
 3rdparty/libwebp/src/dec/vp8l_dec.c           |  10 +-
 3rdparty/libwebp/src/demux/demux.c            |   6 +-
 3rdparty/libwebp/src/dsp/alpha_processing.c   |  29 +-
 .../src/dsp/alpha_processing_mips_dsp_r2.c    |  46 ++
 3rdparty/libwebp/src/dsp/argb.c               |  68 --
 3rdparty/libwebp/src/dsp/argb_mips_dsp_r2.c   | 110 ----
 3rdparty/libwebp/src/dsp/argb_sse2.c          |  70 --
 3rdparty/libwebp/src/dsp/common_sse2.h        |  14 +-
 3rdparty/libwebp/src/dsp/common_sse41.h       | 132 ++++
 3rdparty/libwebp/src/dsp/cost.c               |   9 +-
 3rdparty/libwebp/src/dsp/dec.c                |   9 +-
 3rdparty/libwebp/src/dsp/dsp.h                |  52 +-
 3rdparty/libwebp/src/dsp/enc.c                |   9 +-
 3rdparty/libwebp/src/dsp/filters.c            |   9 +-
 3rdparty/libwebp/src/dsp/lossless.c           |   9 +-
 3rdparty/libwebp/src/dsp/lossless.h           |   4 -
 3rdparty/libwebp/src/dsp/lossless_enc.c       |   9 +-
 3rdparty/libwebp/src/dsp/lossless_enc_sse2.c  |  27 +-
 3rdparty/libwebp/src/dsp/lossless_enc_sse41.c |  94 +++
 3rdparty/libwebp/src/dsp/lossless_sse2.c      |  19 +-
 3rdparty/libwebp/src/dsp/rescaler.c           |   7 +-
 3rdparty/libwebp/src/dsp/rescaler_sse2.c      |  20 +-
 3rdparty/libwebp/src/dsp/ssim.c               |   9 +-
 3rdparty/libwebp/src/dsp/upsampling.c         |  30 +-
 3rdparty/libwebp/src/dsp/upsampling_msa.c     |   6 +
 3rdparty/libwebp/src/dsp/upsampling_sse2.c    |  32 +-
 3rdparty/libwebp/src/dsp/upsampling_sse41.c   | 239 +++++++
 3rdparty/libwebp/src/dsp/yuv.c                |  29 +-
 3rdparty/libwebp/src/dsp/yuv.h                |  13 +
 3rdparty/libwebp/src/dsp/yuv_sse2.c           |   4 +-
 3rdparty/libwebp/src/dsp/yuv_sse41.c          | 613 ++++++++++++++++++
 3rdparty/libwebp/src/enc/alpha_enc.c          |   5 +-
 3rdparty/libwebp/src/enc/analysis_enc.c       |   6 +-
 .../libwebp/src/enc/delta_palettization_enc.c | 455 -------------
 .../libwebp/src/enc/delta_palettization_enc.h |  25 -
 3rdparty/libwebp/src/enc/frame_enc.c          |  26 +-
 3rdparty/libwebp/src/enc/histogram_enc.c      |   9 +-
 3rdparty/libwebp/src/enc/histogram_enc.h      |   5 +-
 3rdparty/libwebp/src/enc/iterator_enc.c       |   8 +-
 3rdparty/libwebp/src/enc/near_lossless_enc.c  |   2 +-
 3rdparty/libwebp/src/enc/picture_csp_enc.c    | 148 +++--
 3rdparty/libwebp/src/enc/picture_psnr_enc.c   |  15 +-
 3rdparty/libwebp/src/enc/quant_enc.c          |  87 +++
 3rdparty/libwebp/src/enc/vp8i_enc.h           |  16 +-
 3rdparty/libwebp/src/enc/vp8l_enc.c           |  79 +--
 3rdparty/libwebp/src/enc/webp_enc.c           |   9 +-
 3rdparty/libwebp/src/mux/muxi.h               |   6 +-
 3rdparty/libwebp/src/utils/endian_inl_utils.h |   7 -
 51 files changed, 1572 insertions(+), 1093 deletions(-)
 delete mode 100644 3rdparty/libwebp/src/dsp/argb.c
 delete mode 100644 3rdparty/libwebp/src/dsp/argb_mips_dsp_r2.c
 delete mode 100644 3rdparty/libwebp/src/dsp/argb_sse2.c
 create mode 100644 3rdparty/libwebp/src/dsp/common_sse41.h
 create mode 100644 3rdparty/libwebp/src/dsp/upsampling_sse41.c
 create mode 100644 3rdparty/libwebp/src/dsp/yuv_sse41.c
 delete mode 100644 3rdparty/libwebp/src/enc/delta_palettization_enc.c
 delete mode 100644 3rdparty/libwebp/src/enc/delta_palettization_enc.h

diff --git a/3rdparty/libwebp/src/dec/frame_dec.c b/3rdparty/libwebp/src/dec/frame_dec.c
index 517d0f5850..a9d5430d00 100644
--- a/3rdparty/libwebp/src/dec/frame_dec.c
+++ b/3rdparty/libwebp/src/dec/frame_dec.c
@@ -400,7 +400,9 @@ static void DitherRow(VP8Decoder* const dec) {
 #define MACROBLOCK_VPOS(mb_y)  ((mb_y) * 16)    // vertical position of a MB
 
 // Finalize and transmit a complete row. Return false in case of user-abort.
-static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
+static int FinishRow(void* arg1, void* arg2) {
+  VP8Decoder* const dec = (VP8Decoder*)arg1;
+  VP8Io* const io = (VP8Io*)arg2;
   int ok = 1;
   const VP8ThreadContext* const ctx = &dec->thread_ctx_;
   const int cache_id = ctx->id_;
@@ -448,10 +450,9 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
     if (y_end > io->crop_bottom) {
       y_end = io->crop_bottom;    // make sure we don't overflow on last row.
     }
+    // If dec->alpha_data_ is not NULL, we have some alpha plane present.
     io->a = NULL;
     if (dec->alpha_data_ != NULL && y_start < y_end) {
-      // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
-      // good idea.
       io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
       if (io->a == NULL) {
         return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
@@ -558,7 +559,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
   if (io->bypass_filtering) {
     dec->filter_type_ = 0;
   }
-  // TODO(skal): filter type / strength / sharpness forcing
 
   // Define the area where we can skip in-loop filtering, in case of cropping.
   //
@@ -569,8 +569,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
   // Means: there's a dependency chain that goes all the way up to the
   // top-left corner of the picture (MB #0). We must filter all the previous
   // macroblocks.
-  // TODO(skal): add an 'approximate_decoding' option, that won't produce
-  // a 1:1 bit-exactness for complex filtering?
   {
     const int extra_pixels = kFilterExtraRows[dec->filter_type_];
     if (dec->filter_type_ == 2) {
@@ -651,7 +649,7 @@ static int InitThreadContext(VP8Decoder* const dec) {
     }
     worker->data1 = dec;
     worker->data2 = (void*)&dec->thread_ctx_.io_;
-    worker->hook = (WebPWorkerHook)FinishRow;
+    worker->hook = FinishRow;
     dec->num_caches_ =
       (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
   } else {
diff --git a/3rdparty/libwebp/src/dec/vp8_dec.c b/3rdparty/libwebp/src/dec/vp8_dec.c
index 6212efd179..c904b529f6 100644
--- a/3rdparty/libwebp/src/dec/vp8_dec.c
+++ b/3rdparty/libwebp/src/dec/vp8_dec.c
@@ -491,7 +491,7 @@ static int GetCoeffsAlt(VP8BitReader* const br,
   return 16;
 }
 
-WEBP_TSAN_IGNORE_FUNCTION static void InitGetCoeffs(void) {
+static WEBP_TSAN_IGNORE_FUNCTION void InitGetCoeffs(void) {
   if (GetCoeffs == NULL) {
     if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
       GetCoeffs = GetCoeffsAlt;
diff --git a/3rdparty/libwebp/src/dec/vp8i_dec.h b/3rdparty/libwebp/src/dec/vp8i_dec.h
index 28244d9d7a..c929933e1c 100644
--- a/3rdparty/libwebp/src/dec/vp8i_dec.h
+++ b/3rdparty/libwebp/src/dec/vp8i_dec.h
@@ -30,9 +30,9 @@ extern "C" {
 // Various defines and enums
 
 // version numbers
-#define DEC_MAJ_VERSION 0
-#define DEC_MIN_VERSION 6
-#define DEC_REV_VERSION 1
+#define DEC_MAJ_VERSION 1
+#define DEC_MIN_VERSION 0
+#define DEC_REV_VERSION 0
 
 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
diff --git a/3rdparty/libwebp/src/dec/vp8l_dec.c b/3rdparty/libwebp/src/dec/vp8l_dec.c
index 42ea3b5e4c..0570f53a77 100644
--- a/3rdparty/libwebp/src/dec/vp8l_dec.c
+++ b/3rdparty/libwebp/src/dec/vp8l_dec.c
@@ -1643,17 +1643,17 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
 
 #if !defined(WEBP_REDUCE_SIZE)
     if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
-
-    if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
-      // need the alpha-multiply functions for premultiplied output or rescaling
-      WebPInitAlphaProcessing();
-    }
 #else
     if (io->use_scaling) {
       dec->status_ = VP8_STATUS_INVALID_PARAM;
       goto Err;
     }
 #endif
+    if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
+      // need the alpha-multiply functions for premultiplied output or rescaling
+      WebPInitAlphaProcessing();
+    }
+
     if (!WebPIsRGBMode(dec->output_->colorspace)) {
       WebPInitConvertARGBToYUV();
       if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
diff --git a/3rdparty/libwebp/src/demux/demux.c b/3rdparty/libwebp/src/demux/demux.c
index 79c24a5a7f..684215e3de 100644
--- a/3rdparty/libwebp/src/demux/demux.c
+++ b/3rdparty/libwebp/src/demux/demux.c
@@ -23,9 +23,9 @@
 #include "src/webp/demux.h"
 #include "src/webp/format_constants.h"
 
-#define DMUX_MAJ_VERSION 0
-#define DMUX_MIN_VERSION 3
-#define DMUX_REV_VERSION 3
+#define DMUX_MAJ_VERSION 1
+#define DMUX_MIN_VERSION 0
+#define DMUX_REV_VERSION 0
 
 typedef struct {
   size_t start_;        // start location of the data
diff --git a/3rdparty/libwebp/src/dsp/alpha_processing.c b/3rdparty/libwebp/src/dsp/alpha_processing.c
index 590e3bc312..819d1391f2 100644
--- a/3rdparty/libwebp/src/dsp/alpha_processing.c
+++ b/3rdparty/libwebp/src/dsp/alpha_processing.c
@@ -366,6 +366,16 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
   return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
 }
 
+#ifdef WORDS_BIGENDIAN
+static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                       const uint8_t* b, int len, uint32_t* out) {
+  int i;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+  }
+}
+#endif
+
 static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
                       int len, int step, uint32_t* out) {
   int i, offset = 0;
@@ -381,6 +391,10 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
 void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
 int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
 void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
+#ifdef WORDS_BIGENDIAN
+void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int, uint32_t*);
+#endif
 void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
                     int len, int step, uint32_t* out);
 
@@ -395,16 +409,14 @@ extern void WebPInitAlphaProcessingSSE2(void);
 extern void WebPInitAlphaProcessingSSE41(void);
 extern void WebPInitAlphaProcessingNEON(void);
 
-static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
-    (VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
-  if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
   WebPMultARGBRow = WebPMultARGBRow_C;
   WebPMultRow = WebPMultRow_C;
   WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
 
+#ifdef WORDS_BIGENDIAN
+  WebPPackARGB = PackARGB_C;
+#endif
   WebPPackRGB = PackRGB_C;
 #if !WEBP_NEON_OMIT_C_CODE
   WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
@@ -451,9 +463,10 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
   assert(WebPDispatchAlphaToGreen != NULL);
   assert(WebPExtractAlpha != NULL);
   assert(WebPExtractGreen != NULL);
+#ifdef WORDS_BIGENDIAN
+  assert(WebPPackARGB != NULL);
+#endif
   assert(WebPPackRGB != NULL);
   assert(WebPHasAlpha8b != NULL);
   assert(WebPHasAlpha32b != NULL);
-
-  alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c b/3rdparty/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
index e0dc91bab9..0090e87cd1 100644
--- a/3rdparty/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
+++ b/3rdparty/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
@@ -125,6 +125,49 @@ static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
   }
 }
 
+#ifdef WORDS_BIGENDIAN
+static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
+                               const uint8_t* g, const uint8_t* b, int len,
+                               uint32_t* out) {
+  int temp0, temp1, temp2, temp3, offset;
+  const int rest = len & 1;
+  const uint32_t* const loop_end = out + len - rest;
+  const int step = 4;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+#endif  // WORDS_BIGENDIAN
+
 static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
                               const uint8_t* b, int len, int step,
                               uint32_t* out) {
@@ -172,6 +215,9 @@ extern void WebPInitAlphaProcessingMIPSdspR2(void);
 WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
   WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
   WebPMultARGBRow = MultARGBRow_MIPSdspR2;
+#ifdef WORDS_BIGENDIAN
+  WebPPackARGB = PackARGB_MIPSdspR2;
+#endif
   WebPPackRGB = PackRGB_MIPSdspR2;
 }
 
diff --git a/3rdparty/libwebp/src/dsp/argb.c b/3rdparty/libwebp/src/dsp/argb.c
deleted file mode 100644
index cc1f9a96c3..0000000000
--- a/3rdparty/libwebp/src/dsp/argb.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions.
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
-  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  int i;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-  }
-}
-
-static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                    int len, int step, uint32_t* out) {
-  int i, offset = 0;
-  for (i = 0; i < len; ++i) {
-    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
-    offset += step;
-  }
-}
-
-void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
-                    const uint8_t*, int, uint32_t*);
-void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
-                   int, int, uint32_t*);
-
-extern void VP8EncDspARGBInitMIPSdspR2(void);
-extern void VP8EncDspARGBInitSSE2(void);
-
-static volatile VP8CPUInfo argb_last_cpuinfo_used =
-    (VP8CPUInfo)&argb_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
-  if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
-
-  VP8PackARGB = PackARGB;
-  VP8PackRGB = PackRGB;
-
-  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
-    if (VP8GetCPUInfo(kSSE2)) {
-      VP8EncDspARGBInitSSE2();
-    }
-#endif
-#if defined(WEBP_USE_MIPS_DSP_R2)
-    if (VP8GetCPUInfo(kMIPSdspR2)) {
-      VP8EncDspARGBInitMIPSdspR2();
-    }
-#endif
-  }
-  argb_last_cpuinfo_used = VP8GetCPUInfo;
-}
diff --git a/3rdparty/libwebp/src/dsp/argb_mips_dsp_r2.c b/3rdparty/libwebp/src/dsp/argb_mips_dsp_r2.c
deleted file mode 100644
index af65acb8ff..0000000000
--- a/3rdparty/libwebp/src/dsp/argb_mips_dsp_r2.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions (mips version).
-//
-// Author: Djordje Pesut (djordje.pesut@imgtec.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_MIPS_DSP_R2)
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  int temp0, temp1, temp2, temp3, offset;
-  const int rest = len & 1;
-  const uint32_t* const loop_end = out + len - rest;
-  const int step = 4;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[a])         \n\t"
-    "lbux         %[temp1],    %[offset](%[r])         \n\t"
-    "lbux         %[temp2],    %[offset](%[g])         \n\t"
-    "lbux         %[temp3],    %[offset](%[b])         \n\t"
-    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
-    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-
-static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
-                    int len, int step, uint32_t* out) {
-  int temp0, temp1, temp2, offset;
-  const int rest = len & 1;
-  const int a = 0xff;
-  const uint32_t* const loop_end = out + len - rest;
-  __asm__ volatile (
-    "xor          %[offset],   %[offset], %[offset]    \n\t"
-    "beq          %[loop_end], %[out],    0f           \n\t"
-  "2:                                                  \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "addiu        %[out],      %[out],    4            \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    -4(%[out])              \n\t"
-    "addu         %[offset],   %[offset], %[step]      \n\t"
-    "bne          %[loop_end], %[out],    2b           \n\t"
-  "0:                                                  \n\t"
-    "beq          %[rest],     $zero,     1f           \n\t"
-    "lbux         %[temp0],    %[offset](%[r])         \n\t"
-    "lbux         %[temp1],    %[offset](%[g])         \n\t"
-    "lbux         %[temp2],    %[offset](%[b])         \n\t"
-    "ins          %[temp0],    %[a],      16,     16   \n\t"
-    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
-    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
-    "sw           %[temp0],    0(%[out])               \n\t"
-  "1:                                                  \n\t"
-    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-      [offset]"=&r"(offset), [out]"+&r"(out)
-    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
-      [loop_end]"r"(loop_end), [rest]"r"(rest)
-    : "memory"
-  );
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspARGBInitMIPSdspR2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
-  VP8PackARGB = PackARGB;
-  VP8PackRGB = PackRGB;
-}
-
-#else  // !WEBP_USE_MIPS_DSP_R2
-
-WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
-
-#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/3rdparty/libwebp/src/dsp/argb_sse2.c b/3rdparty/libwebp/src/dsp/argb_sse2.c
deleted file mode 100644
index 17dd596c0e..0000000000
--- a/3rdparty/libwebp/src/dsp/argb_sse2.c
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2014 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-//   ARGB making functions (SSE2 version).
-//
-// Author: Skal (pascal.massimino@gmail.com)
-
-#include "./dsp.h"
-
-#if defined(WEBP_USE_SSE2)
-
-#include <assert.h>
-#include <emmintrin.h>
-#include <string.h>
-
-static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
-  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
-}
-
-static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                     const uint8_t* b, int len, uint32_t* out) {
-  if (g == r + 1) {  // RGBA input order. Need to swap R and B.
-    int i = 0;
-    const int len_max = len & ~3;  // max length processed in main loop
-    const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
-    assert(b == r + 2);
-    assert(a == r + 3);
-    for (; i < len_max; i += 4) {
-      const __m128i A = _mm_loadu_si128((const __m128i*)(r + 4 * i));
-      const __m128i B = _mm_and_si128(A, red_blue_mask);     // R 0 B 0
-      const __m128i C = _mm_andnot_si128(red_blue_mask, A);  // 0 G 0 A
-      const __m128i D = _mm_shufflelo_epi16(B, _MM_SHUFFLE(2, 3, 0, 1));
-      const __m128i E = _mm_shufflehi_epi16(D, _MM_SHUFFLE(2, 3, 0, 1));
-      const __m128i F = _mm_or_si128(E, C);
-      _mm_storeu_si128((__m128i*)(out + i), F);
-    }
-    for (; i < len; ++i) {
-      out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-    }
-  } else {
-    assert(g == b + 1);
-    assert(r == b + 2);
-    assert(a == b + 3);
-    memcpy(out, b, len * 4);
-  }
-}
-
-//------------------------------------------------------------------------------
-// Entry point
-
-extern void VP8EncDspARGBInitSSE2(void);
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
-  extern void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
-                    const uint8_t*, int, uint32_t*);
-
-  VP8PackARGB = PackARGB;
-}
-
-#else  // !WEBP_USE_SSE2
-
-WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
-
-#endif  // WEBP_USE_SSE2
diff --git a/3rdparty/libwebp/src/dsp/common_sse2.h b/3rdparty/libwebp/src/dsp/common_sse2.h
index 995d7cf4ea..e9f1ebff44 100644
--- a/3rdparty/libwebp/src/dsp/common_sse2.h
+++ b/3rdparty/libwebp/src/dsp/common_sse2.h
@@ -128,9 +128,9 @@ static WEBP_INLINE void VP8Transpose_2_4x4_16b(
 // Pack the planar buffers
 // rrrr... rrrr... gggg... gggg... bbbb... bbbb....
 // triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
-static WEBP_INLINE void VP8PlanarTo24b(__m128i* const in0, __m128i* const in1,
-                                       __m128i* const in2, __m128i* const in3,
-                                       __m128i* const in4, __m128i* const in5) {
+static WEBP_INLINE void VP8PlanarTo24b_SSE2(
+    __m128i* const in0, __m128i* const in1, __m128i* const in2,
+    __m128i* const in3, __m128i* const in4, __m128i* const in5) {
   // The input is 6 registers of sixteen 8b but for the sake of explanation,
   // let's take 6 registers of four 8b values.
   // To pack, we will keep taking one every two 8b integer and move it
@@ -159,10 +159,10 @@ static WEBP_INLINE void VP8PlanarTo24b(__m128i* const in0, __m128i* const in1,
 
 // Convert four packed four-channel buffers like argbargbargbargb... into the
 // split channels aaaaa ... rrrr ... gggg .... bbbbb ......
-static WEBP_INLINE void VP8L32bToPlanar(__m128i* const in0,
-                                        __m128i* const in1,
-                                        __m128i* const in2,
-                                        __m128i* const in3) {
+static WEBP_INLINE void VP8L32bToPlanar_SSE2(__m128i* const in0,
+                                             __m128i* const in1,
+                                             __m128i* const in2,
+                                             __m128i* const in3) {
   // Column-wise transpose.
   const __m128i A0 = _mm_unpacklo_epi8(*in0, *in1);
   const __m128i A1 = _mm_unpackhi_epi8(*in0, *in1);
diff --git a/3rdparty/libwebp/src/dsp/common_sse41.h b/3rdparty/libwebp/src/dsp/common_sse41.h
new file mode 100644
index 0000000000..2f173c024a
--- /dev/null
+++ b/3rdparty/libwebp/src/dsp/common_sse41.h
@@ -0,0 +1,132 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4 code common to several files.
+//
+// Author: Vincent Rabaud (vrabaud@google.com)
+
+#ifndef WEBP_DSP_COMMON_SSE41_H_
+#define WEBP_DSP_COMMON_SSE41_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(WEBP_USE_SSE41)
+#include <smmintrin.h>
+
+//------------------------------------------------------------------------------
+// Channel mixing.
+// Shuffles the input buffer as A0 0 0 A1 0 0 A2 ...
+#define WEBP_SSE41_SHUFF(OUT, IN0, IN1)    \
+  OUT##0 = _mm_shuffle_epi8(*IN0, shuff0); \
+  OUT##1 = _mm_shuffle_epi8(*IN0, shuff1); \
+  OUT##2 = _mm_shuffle_epi8(*IN0, shuff2); \
+  OUT##3 = _mm_shuffle_epi8(*IN1, shuff0); \
+  OUT##4 = _mm_shuffle_epi8(*IN1, shuff1); \
+  OUT##5 = _mm_shuffle_epi8(*IN1, shuff2);
+
+// Pack the planar buffers
+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
+// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
+static WEBP_INLINE void VP8PlanarTo24b_SSE41(
+    __m128i* const in0, __m128i* const in1, __m128i* const in2,
+    __m128i* const in3, __m128i* const in4, __m128i* const in5) {
+  __m128i R0, R1, R2, R3, R4, R5;
+  __m128i G0, G1, G2, G3, G4, G5;
+  __m128i B0, B1, B2, B3, B4, B5;
+
+  // Process R.
+  {
+    const __m128i shuff0 = _mm_set_epi8(
+        5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
+    const __m128i shuff1 = _mm_set_epi8(
+        -1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1);
+    const __m128i shuff2 = _mm_set_epi8(
+     -1, -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1);
+    WEBP_SSE41_SHUFF(R, in0, in1)
+  }
+
+  // Process G.
+  {
+    // Same as before, just shifted to the left by one and including the right
+    // padding.
+    const __m128i shuff0 = _mm_set_epi8(
+        -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
+    const __m128i shuff1 = _mm_set_epi8(
+        10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
+    const __m128i shuff2 = _mm_set_epi8(
+     -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1);
+    WEBP_SSE41_SHUFF(G, in2, in3)
+  }
+
+  // Process B.
+  {
+    const __m128i shuff0 = _mm_set_epi8(
+        -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
+    const __m128i shuff1 = _mm_set_epi8(
+        -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
+    const __m128i shuff2 = _mm_set_epi8(
+      15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1, 10);
+    WEBP_SSE41_SHUFF(B, in4, in5)
+  }
+
+  // OR the different channels.
+  {
+    const __m128i RG0 = _mm_or_si128(R0, G0);
+    const __m128i RG1 = _mm_or_si128(R1, G1);
+    const __m128i RG2 = _mm_or_si128(R2, G2);
+    const __m128i RG3 = _mm_or_si128(R3, G3);
+    const __m128i RG4 = _mm_or_si128(R4, G4);
+    const __m128i RG5 = _mm_or_si128(R5, G5);
+    *in0 = _mm_or_si128(RG0, B0);
+    *in1 = _mm_or_si128(RG1, B1);
+    *in2 = _mm_or_si128(RG2, B2);
+    *in3 = _mm_or_si128(RG3, B3);
+    *in4 = _mm_or_si128(RG4, B4);
+    *in5 = _mm_or_si128(RG5, B5);
+  }
+}
+
+#undef WEBP_SSE41_SHUFF
+
+// Convert four packed four-channel buffers like argbargbargbargb... into the
+// split channels aaaaa ... rrrr ... gggg .... bbbbb ......
+static WEBP_INLINE void VP8L32bToPlanar_SSE41(__m128i* const in0,
+                                              __m128i* const in1,
+                                              __m128i* const in2,
+                                              __m128i* const in3) {
+  // aaaarrrrggggbbbb
+  const __m128i shuff0 =
+      _mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0);
+  const __m128i A0 = _mm_shuffle_epi8(*in0, shuff0);
+  const __m128i A1 = _mm_shuffle_epi8(*in1, shuff0);
+  const __m128i A2 = _mm_shuffle_epi8(*in2, shuff0);
+  const __m128i A3 = _mm_shuffle_epi8(*in3, shuff0);
+  // A0A1R0R1
+  // G0G1B0B1
+  // A2A3R2R3
+  // G0G1B0B1
+  const __m128i B0 = _mm_unpacklo_epi32(A0, A1);
+  const __m128i B1 = _mm_unpackhi_epi32(A0, A1);
+  const __m128i B2 = _mm_unpacklo_epi32(A2, A3);
+  const __m128i B3 = _mm_unpackhi_epi32(A2, A3);
+  *in3 = _mm_unpacklo_epi64(B0, B2);
+  *in2 = _mm_unpackhi_epi64(B0, B2);
+  *in1 = _mm_unpacklo_epi64(B1, B3);
+  *in0 = _mm_unpackhi_epi64(B1, B3);
+}
+
+#endif  // WEBP_USE_SSE41
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBP_DSP_COMMON_SSE41_H_
diff --git a/3rdparty/libwebp/src/dsp/cost.c b/3rdparty/libwebp/src/dsp/cost.c
index a732389d58..634ccc2085 100644
--- a/3rdparty/libwebp/src/dsp/cost.c
+++ b/3rdparty/libwebp/src/dsp/cost.c
@@ -378,12 +378,7 @@ extern void VP8EncDspCostInitMIPS32(void);
 extern void VP8EncDspCostInitMIPSdspR2(void);
 extern void VP8EncDspCostInitSSE2(void);
 
-static volatile VP8CPUInfo cost_last_cpuinfo_used =
-    (VP8CPUInfo)&cost_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
-  if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) {
   VP8GetResidualCost = GetResidualCost_C;
   VP8SetResidualCoeffs = SetResidualCoeffs_C;
 
@@ -405,8 +400,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
     }
 #endif
   }
-
-  cost_last_cpuinfo_used = VP8GetCPUInfo;
 }
 
 //------------------------------------------------------------------------------
diff --git a/3rdparty/libwebp/src/dsp/dec.c b/3rdparty/libwebp/src/dsp/dec.c
index 7e82407567..1119842dd3 100644
--- a/3rdparty/libwebp/src/dsp/dec.c
+++ b/3rdparty/libwebp/src/dsp/dec.c
@@ -741,12 +741,7 @@ extern void VP8DspInitMIPS32(void);
 extern void VP8DspInitMIPSdspR2(void);
 extern void VP8DspInitMSA(void);
 
-static volatile VP8CPUInfo dec_last_cpuinfo_used =
-    (VP8CPUInfo)&dec_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
-  if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(VP8DspInit) {
   VP8InitClipTables();
 
 #if !WEBP_NEON_OMIT_C_CODE
@@ -889,6 +884,4 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
   assert(VP8PredChroma8[5] != NULL);
   assert(VP8PredChroma8[6] != NULL);
   assert(VP8DitherCombine8x8 != NULL);
-
-  dec_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/3rdparty/libwebp/src/dsp/dsp.h b/3rdparty/libwebp/src/dsp/dsp.h
index 99eefe092f..4ab77a5130 100644
--- a/3rdparty/libwebp/src/dsp/dsp.h
+++ b/3rdparty/libwebp/src/dsp/dsp.h
@@ -141,6 +141,42 @@ extern "C" {
 #endif
 #endif
 
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+#include <pthread.h>  // NOLINT
+
+#define WEBP_DSP_INIT(func) do {                                    \
+  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
+      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
+  static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
+  if (pthread_mutex_lock(&func ## _lock)) break;                    \
+  if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func();          \
+  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
+  (void)pthread_mutex_unlock(&func ## _lock);                       \
+} while (0)
+#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+#define WEBP_DSP_INIT(func) do {                                    \
+  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
+      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
+  if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break;           \
+  func();                                                           \
+  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
+} while (0)
+#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
+
+// Defines an Init + helper function that control multiple initialization of
+// function pointers / tables.
+/* Usage:
+   WEBP_DSP_INIT_FUNC(InitFunc) {
+     ...function body
+   }
+*/
+#define WEBP_DSP_INIT_FUNC(name)                             \
+  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
+  WEBP_TSAN_IGNORE_FUNCTION void name(void) {                \
+    WEBP_DSP_INIT(name ## _body);                            \
+  }                                                          \
+  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
+
 #define WEBP_UBSAN_IGNORE_UNDEF
 #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
 #if defined(__clang__) && defined(__has_attribute)
@@ -166,6 +202,13 @@ extern "C" {
 #define WEBP_SWAP_16BIT_CSP 0
 #endif
 
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && \
+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
 typedef enum {
   kSSE2,
   kSSE3,
@@ -189,7 +232,7 @@ WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
 // avoiding a compiler warning.
 #define WEBP_DSP_INIT_STUB(func) \
   extern void func(void); \
-  WEBP_TSAN_IGNORE_FUNCTION void func(void) {}
+  void func(void) {}
 
 //------------------------------------------------------------------------------
 // Encoding
@@ -578,6 +621,13 @@ void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
                    int width, int inverse);
 void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
 
+#ifdef WORDS_BIGENDIAN
+// ARGB packing function: a/r/g/b input is rgba or bgra order.
+extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
+                            const uint8_t* g, const uint8_t* b, int len,
+                            uint32_t* out);
+#endif
+
 // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
 extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
                            int len, int step, uint32_t* out);
diff --git a/3rdparty/libwebp/src/dsp/enc.c b/3rdparty/libwebp/src/dsp/enc.c
index 1c807f1df7..fa23b40a30 100644
--- a/3rdparty/libwebp/src/dsp/enc.c
+++ b/3rdparty/libwebp/src/dsp/enc.c
@@ -740,12 +740,7 @@ extern void VP8EncDspInitMIPS32(void);
 extern void VP8EncDspInitMIPSdspR2(void);
 extern void VP8EncDspInitMSA(void);
 
-static volatile VP8CPUInfo enc_last_cpuinfo_used =
-    (VP8CPUInfo)&enc_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
-  if (enc_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
   VP8DspInit();  // common inverse transforms
   InitTables();
 
@@ -838,6 +833,4 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
   assert(VP8EncQuantizeBlockWHT != NULL);
   assert(VP8Copy4x4 != NULL);
   assert(VP8Copy16x8 != NULL);
-
-  enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/3rdparty/libwebp/src/dsp/filters.c b/3rdparty/libwebp/src/dsp/filters.c
index ca5f877da7..069a22eaef 100644
--- a/3rdparty/libwebp/src/dsp/filters.c
+++ b/3rdparty/libwebp/src/dsp/filters.c
@@ -238,12 +238,7 @@ extern void VP8FiltersInitMSA(void);
 extern void VP8FiltersInitNEON(void);
 extern void VP8FiltersInitSSE2(void);
 
-static volatile VP8CPUInfo filters_last_cpuinfo_used =
-    (VP8CPUInfo)&filters_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
-  if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
   WebPUnfilters[WEBP_FILTER_NONE] = NULL;
 #if !WEBP_NEON_OMIT_C_CODE
   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
@@ -289,6 +284,4 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
   assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
   assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
   assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
-
-  filters_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/3rdparty/libwebp/src/dsp/lossless.c b/3rdparty/libwebp/src/dsp/lossless.c
index 83f553d9ad..f9b3c182d3 100644
--- a/3rdparty/libwebp/src/dsp/lossless.c
+++ b/3rdparty/libwebp/src/dsp/lossless.c
@@ -577,9 +577,6 @@ extern void VP8LDspInitNEON(void);
 extern void VP8LDspInitMIPSdspR2(void);
 extern void VP8LDspInitMSA(void);
 
-static volatile VP8CPUInfo lossless_last_cpuinfo_used =
-    (VP8CPUInfo)&lossless_last_cpuinfo_used;
-
 #define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
   (OUT)[0] = IN##0_C;                                     \
   (OUT)[1] = IN##1_C;                                     \
@@ -599,9 +596,7 @@ static volatile VP8CPUInfo lossless_last_cpuinfo_used =
   (OUT)[15] = IN##0_C;                                    \
 } while (0);
 
-WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
-  if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(VP8LDspInit) {
   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
@@ -658,8 +653,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
   assert(VP8LConvertBGRAToRGB565 != NULL);
   assert(VP8LMapColor32b != NULL);
   assert(VP8LMapColor8b != NULL);
-
-  lossless_last_cpuinfo_used = VP8GetCPUInfo;
 }
 #undef COPY_PREDICTOR_ARRAY
 
diff --git a/3rdparty/libwebp/src/dsp/lossless.h b/3rdparty/libwebp/src/dsp/lossless.h
index a99dbda686..b2bbdfc93c 100644
--- a/3rdparty/libwebp/src/dsp/lossless.h
+++ b/3rdparty/libwebp/src/dsp/lossless.h
@@ -25,10 +25,6 @@
 extern "C" {
 #endif
 
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-#include "src/enc/delta_palettization_enc.h"
-#endif  // WEBP_EXPERIMENTAL_FEATURES
-
 //------------------------------------------------------------------------------
 // Decoding
 
diff --git a/3rdparty/libwebp/src/dsp/lossless_enc.c b/3rdparty/libwebp/src/dsp/lossless_enc.c
index 92ca3c0542..d608326fef 100644
--- a/3rdparty/libwebp/src/dsp/lossless_enc.c
+++ b/3rdparty/libwebp/src/dsp/lossless_enc.c
@@ -863,12 +863,7 @@ extern void VP8LEncDspInitMIPS32(void);
 extern void VP8LEncDspInitMIPSdspR2(void);
 extern void VP8LEncDspInitMSA(void);
 
-static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used =
-    (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
-  if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
   VP8LDspInit();
 
 #if !WEBP_NEON_OMIT_C_CODE
@@ -1011,8 +1006,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
   assert(VP8LPredictorsSub_C[13] != NULL);
   assert(VP8LPredictorsSub_C[14] != NULL);
   assert(VP8LPredictorsSub_C[15] != NULL);
-
-  lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
 
 //------------------------------------------------------------------------------
diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
index 1eaf35ca8e..f84a9909e1 100644
--- a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
@@ -46,16 +46,14 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
 //------------------------------------------------------------------------------
 // Color Transform
 
+#define MK_CST_16(HI, LO) \
+  _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
+
 static void TransformColor_SSE2(const VP8LMultipliers* const m,
                                 uint32_t* argb_data, int num_pixels) {
-  const __m128i mults_rb = _mm_set_epi16(
-      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
-      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
-      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
-      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_));
-  const __m128i mults_b2 = _mm_set_epi16(
-      CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0,
-      CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0);
+  const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
+                                     CST_5b(m->green_to_blue_));
+  const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
   const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);  // alpha-green masks
   const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff);  // red-blue masks
   int i;
@@ -85,12 +83,8 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
                                             int tile_width, int tile_height,
                                             int green_to_blue, int red_to_blue,
                                             int histo[]) {
-  const __m128i mults_r = _mm_set_epi16(
-      CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0,
-      CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0);
-  const __m128i mults_g = _mm_set_epi16(
-      0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue),
-      0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue));
+  const __m128i mults_r = MK_CST_16(CST_5b(red_to_blue), 0);
+  const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_blue));
   const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
   const __m128i mask_b = _mm_set1_epi32(0x0000ff);  // blue mask
   int y;
@@ -135,9 +129,7 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
 static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
                                            int tile_width, int tile_height,
                                            int green_to_red, int histo[]) {
-  const __m128i mults_g = _mm_set_epi16(
-      0, CST_5b(green_to_red), 0, CST_5b(green_to_red),
-      0, CST_5b(green_to_red), 0, CST_5b(green_to_red));
+  const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
   const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
   const __m128i mask = _mm_set1_epi32(0xff);
 
@@ -174,6 +166,7 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
   }
 }
 #undef SPAN
+#undef MK_CST_16
 
 //------------------------------------------------------------------------------
 
diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c b/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
index 3526a342d3..2e12a712eb 100644
--- a/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
+++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
@@ -18,6 +18,9 @@
 #include <smmintrin.h>
 #include "src/dsp/lossless.h"
 
+// For sign-extended multiplying constants, pre-shifted by 5:
+#define CST_5b(X)  (((int16_t)((uint16_t)(X) << 8)) >> 5)
+
 //------------------------------------------------------------------------------
 // Subtract-Green Transform
 
@@ -38,6 +41,95 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
   }
 }
 
+//------------------------------------------------------------------------------
+// Color Transform
+
+#define SPAN 8
+static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
+                                             int tile_width, int tile_height,
+                                             int green_to_blue, int red_to_blue,
+                                             int histo[]) {
+  const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue));
+  const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue));
+  const __m128i mask_g = _mm_set1_epi16(0xff00);   // green mask
+  const __m128i mask_gb = _mm_set1_epi32(0xffff);  // green/blue mask
+  const __m128i mask_b = _mm_set1_epi16(0x00ff);   // blue mask
+  const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1,
+                                            -1, -1, -1, -1, -1, -1, -1);
+  const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1,
+                                            2, -1, 6, -1, 10, -1, 14);
+  int y;
+  for (y = 0; y < tile_height; ++y) {
+    const uint32_t* const src = argb + y * stride;
+    int i, x;
+    for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+      uint16_t values[SPAN];
+      const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
+      const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+      const __m128i r0 = _mm_shuffle_epi8(in0, shuffler_lo);
+      const __m128i r1 = _mm_shuffle_epi8(in1, shuffler_hi);
+      const __m128i r = _mm_or_si128(r0, r1);         // r 0
+      const __m128i gb0 = _mm_and_si128(in0, mask_gb);
+      const __m128i gb1 = _mm_and_si128(in1, mask_gb);
+      const __m128i gb = _mm_packus_epi32(gb0, gb1);  // g b
+      const __m128i g = _mm_and_si128(gb, mask_g);    // g 0
+      const __m128i A = _mm_mulhi_epi16(r, mults_r);  // x dbr
+      const __m128i B = _mm_mulhi_epi16(g, mults_g);  // x dbg
+      const __m128i C = _mm_sub_epi8(gb, B);          // x b'
+      const __m128i D = _mm_sub_epi8(C, A);           // x b''
+      const __m128i E = _mm_and_si128(D, mask_b);     // 0 b''
+      _mm_storeu_si128((__m128i*)values, E);
+      for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+    }
+  }
+  {
+    const int left_over = tile_width & (SPAN - 1);
+    if (left_over > 0) {
+      VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride,
+                                       left_over, tile_height,
+                                       green_to_blue, red_to_blue, histo);
+    }
+  }
+}
+
+static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
+                                            int tile_width, int tile_height,
+                                            int green_to_red, int histo[]) {
+  const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_red));
+  const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
+  const __m128i mask = _mm_set1_epi16(0xff);
+
+  int y;
+  for (y = 0; y < tile_height; ++y) {
+    const uint32_t* const src = argb + y * stride;
+    int i, x;
+    for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+      uint16_t values[SPAN];
+      const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
+      const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+      const __m128i g0 = _mm_and_si128(in0, mask_g);  // 0 0  | g 0
+      const __m128i g1 = _mm_and_si128(in1, mask_g);
+      const __m128i g = _mm_packus_epi32(g0, g1);     // g 0
+      const __m128i A0 = _mm_srli_epi32(in0, 16);     // 0 0  | x r
+      const __m128i A1 = _mm_srli_epi32(in1, 16);
+      const __m128i A = _mm_packus_epi32(A0, A1);     // x r
+      const __m128i B = _mm_mulhi_epi16(g, mults_g);  // x dr
+      const __m128i C = _mm_sub_epi8(A, B);           // x r'
+      const __m128i D = _mm_and_si128(C, mask);       // 0 r'
+      _mm_storeu_si128((__m128i*)values, D);
+      for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+    }
+  }
+  {
+    const int left_over = tile_width & (SPAN - 1);
+    if (left_over > 0) {
+      VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride,
+                                      left_over, tile_height, green_to_red,
+                                      histo);
+    }
+  }
+}
+
 //------------------------------------------------------------------------------
 // Entry point
 
@@ -45,6 +137,8 @@ extern void VP8LEncDspInitSSE41(void);
 
 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
   VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
+  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
+  VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
 }
 
 #else  // !WEBP_USE_SSE41
diff --git a/3rdparty/libwebp/src/dsp/lossless_sse2.c b/3rdparty/libwebp/src/dsp/lossless_sse2.c
index 653b466cd6..17d7576419 100644
--- a/3rdparty/libwebp/src/dsp/lossless_sse2.c
+++ b/3rdparty/libwebp/src/dsp/lossless_sse2.c
@@ -453,14 +453,11 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
                                        int num_pixels, uint32_t* dst) {
 // sign-extended multiplying constants, pre-shifted by 5.
 #define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
-  const __m128i mults_rb = _mm_set_epi16(
-      CST(green_to_red_), CST(green_to_blue_),
-      CST(green_to_red_), CST(green_to_blue_),
-      CST(green_to_red_), CST(green_to_blue_),
-      CST(green_to_red_), CST(green_to_blue_));
-  const __m128i mults_b2 = _mm_set_epi16(
-      CST(red_to_blue_), 0, CST(red_to_blue_), 0,
-      CST(red_to_blue_), 0, CST(red_to_blue_), 0);
+#define MK_CST_16(HI, LO) \
+  _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
+  const __m128i mults_rb = MK_CST_16(CST(green_to_red_), CST(green_to_blue_));
+  const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
+#undef MK_CST_16
 #undef CST
   const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);  // alpha-green masks
   int i;
@@ -503,11 +500,11 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
     __m128i in5 = _mm_loadu_si128(in + 5);
     __m128i in6 = _mm_loadu_si128(in + 6);
     __m128i in7 = _mm_loadu_si128(in + 7);
-    VP8L32bToPlanar(&in0, &in1, &in2, &in3);
-    VP8L32bToPlanar(&in4, &in5, &in6, &in7);
+    VP8L32bToPlanar_SSE2(&in0, &in1, &in2, &in3);
+    VP8L32bToPlanar_SSE2(&in4, &in5, &in6, &in7);
     // At this points, in1/in5 contains red only, in2/in6 green only ...
     // Pack the colors in 24b RGB.
-    VP8PlanarTo24b(&in1, &in5, &in2, &in6, &in3, &in7);
+    VP8PlanarTo24b_SSE2(&in1, &in5, &in2, &in6, &in3, &in7);
     _mm_storeu_si128(out + 0, in1);
     _mm_storeu_si128(out + 1, in5);
     _mm_storeu_si128(out + 2, in2);
diff --git a/3rdparty/libwebp/src/dsp/rescaler.c b/3rdparty/libwebp/src/dsp/rescaler.c
index 4b6b7834e5..f307d35056 100644
--- a/3rdparty/libwebp/src/dsp/rescaler.c
+++ b/3rdparty/libwebp/src/dsp/rescaler.c
@@ -204,11 +204,7 @@ extern void WebPRescalerDspInitMIPSdspR2(void);
 extern void WebPRescalerDspInitMSA(void);
 extern void WebPRescalerDspInitNEON(void);
 
-static volatile VP8CPUInfo rescaler_last_cpuinfo_used =
-    (VP8CPUInfo)&rescaler_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
-  if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return;
+WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
 #if !defined(WEBP_REDUCE_SIZE)
 #if !WEBP_NEON_OMIT_C_CODE
   WebPRescalerExportRowExpand = WebPRescalerExportRowExpand_C;
@@ -253,5 +249,4 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
   assert(WebPRescalerImportRowExpand != NULL);
   assert(WebPRescalerImportRowShrink != NULL);
 #endif   // WEBP_REDUCE_SIZE
-  rescaler_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/3rdparty/libwebp/src/dsp/rescaler_sse2.c b/3rdparty/libwebp/src/dsp/rescaler_sse2.c
index f93b204fe1..64c50deab5 100644
--- a/3rdparty/libwebp/src/dsp/rescaler_sse2.c
+++ b/3rdparty/libwebp/src/dsp/rescaler_sse2.c
@@ -36,7 +36,7 @@ static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) {
 }
 
 // input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
-static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) {
+static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i A = _mm_loadl_epi64((const __m128i*)(src));  // ABCDEFGH
   *out = _mm_unpacklo_epi8(A, zero);
@@ -50,13 +50,15 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
   int accum = x_add;
   __m128i cur_pixels;
 
+  // SSE2 implementation only works with 16b signed arithmetic at max.
+  if (wrk->src_width < 8 || accum >= (1 << 15)) {
+    WebPRescalerImportRowExpand_C(wrk, src);
+    return;
+  }
+
   assert(!WebPRescalerInputDone(wrk));
   assert(wrk->x_expand);
   if (wrk->num_channels == 4) {
-    if (wrk->src_width < 2) {
-      WebPRescalerImportRowExpand_C(wrk, src);
-      return;
-    }
     LoadTwoPixels_SSE2(src, &cur_pixels);
     src += 4;
     while (1) {
@@ -75,11 +77,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
   } else {
     int left;
     const uint8_t* const src_limit = src + wrk->src_width - 8;
-    if (wrk->src_width < 8) {
-      WebPRescalerImportRowExpand_C(wrk, src);
-      return;
-    }
-    LoadHeightPixels_SSE2(src, &cur_pixels);
+    LoadEightPixels_SSE2(src, &cur_pixels);
     src += 7;
     left = 7;
     while (1) {
@@ -94,7 +92,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
         if (--left) {
           cur_pixels = _mm_srli_si128(cur_pixels, 2);
         } else if (src <= src_limit) {
-          LoadHeightPixels_SSE2(src, &cur_pixels);
+          LoadEightPixels_SSE2(src, &cur_pixels);
           src += 7;
           left = 7;
         } else {   // tail
diff --git a/3rdparty/libwebp/src/dsp/ssim.c b/3rdparty/libwebp/src/dsp/ssim.c
index dc1b518a33..989ce8254c 100644
--- a/3rdparty/libwebp/src/dsp/ssim.c
+++ b/3rdparty/libwebp/src/dsp/ssim.c
@@ -139,12 +139,7 @@ VP8AccumulateSSEFunc VP8AccumulateSSE;
 
 extern void VP8SSIMDspInitSSE2(void);
 
-static volatile VP8CPUInfo ssim_last_cpuinfo_used =
-    (VP8CPUInfo)&ssim_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
-  if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) {
 #if !defined(WEBP_REDUCE_SIZE)
   VP8SSIMGetClipped = SSIMGetClipped_C;
   VP8SSIMGet = SSIMGet_C;
@@ -161,6 +156,4 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
     }
 #endif
   }
-
-  ssim_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/3rdparty/libwebp/src/dsp/upsampling.c b/3rdparty/libwebp/src/dsp/upsampling.c
index e72626a82a..9b60da5bbb 100644
--- a/3rdparty/libwebp/src/dsp/upsampling.c
+++ b/3rdparty/libwebp/src/dsp/upsampling.c
@@ -217,13 +217,9 @@ WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
 
 extern void WebPInitYUV444ConvertersMIPSdspR2(void);
 extern void WebPInitYUV444ConvertersSSE2(void);
+extern void WebPInitYUV444ConvertersSSE41(void);
 
-static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 =
-    (VP8CPUInfo)&upsampling_last_cpuinfo_used1;
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
-  if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) {
   WebPYUV444Converters[MODE_RGBA]      = WebPYuv444ToRgba_C;
   WebPYUV444Converters[MODE_BGRA]      = WebPYuv444ToBgra_C;
   WebPYUV444Converters[MODE_RGB]       = WebPYuv444ToRgb_C;
@@ -242,29 +238,29 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
       WebPInitYUV444ConvertersSSE2();
     }
 #endif
+#if defined(WEBP_USE_SSE41)
+    if (VP8GetCPUInfo(kSSE4_1)) {
+      WebPInitYUV444ConvertersSSE41();
+    }
+#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       WebPInitYUV444ConvertersMIPSdspR2();
     }
 #endif
   }
-  upsampling_last_cpuinfo_used1 = VP8GetCPUInfo;
 }
 
 //------------------------------------------------------------------------------
 // Main calls
 
 extern void WebPInitUpsamplersSSE2(void);
+extern void WebPInitUpsamplersSSE41(void);
 extern void WebPInitUpsamplersNEON(void);
 extern void WebPInitUpsamplersMIPSdspR2(void);
 extern void WebPInitUpsamplersMSA(void);
 
-static volatile VP8CPUInfo upsampling_last_cpuinfo_used2 =
-    (VP8CPUInfo)&upsampling_last_cpuinfo_used2;
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
-  if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
 #ifdef FANCY_UPSAMPLING
 #if !WEBP_NEON_OMIT_C_CODE
   WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair_C;
@@ -287,6 +283,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
       WebPInitUpsamplersSSE2();
     }
 #endif
+#if defined(WEBP_USE_SSE41)
+    if (VP8GetCPUInfo(kSSE4_1)) {
+      WebPInitUpsamplersSSE41();
+    }
+#endif
 #if defined(WEBP_USE_MIPS_DSP_R2)
     if (VP8GetCPUInfo(kMIPSdspR2)) {
       WebPInitUpsamplersMIPSdspR2();
@@ -310,6 +311,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
   assert(WebPUpsamplers[MODE_BGRA] != NULL);
   assert(WebPUpsamplers[MODE_rgbA] != NULL);
   assert(WebPUpsamplers[MODE_bgrA] != NULL);
+#if !defined(WEBP_REDUCE_CSP) || !WEBP_NEON_OMIT_C_CODE
   assert(WebPUpsamplers[MODE_RGB] != NULL);
   assert(WebPUpsamplers[MODE_BGR] != NULL);
   assert(WebPUpsamplers[MODE_ARGB] != NULL);
@@ -317,9 +319,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
   assert(WebPUpsamplers[MODE_RGB_565] != NULL);
   assert(WebPUpsamplers[MODE_Argb] != NULL);
   assert(WebPUpsamplers[MODE_rgbA_4444] != NULL);
+#endif
 
 #endif  // FANCY_UPSAMPLING
-  upsampling_last_cpuinfo_used2 = VP8GetCPUInfo;
 }
 
 //------------------------------------------------------------------------------
diff --git a/3rdparty/libwebp/src/dsp/upsampling_msa.c b/3rdparty/libwebp/src/dsp/upsampling_msa.c
index 535ffb772c..99eea70e7d 100644
--- a/3rdparty/libwebp/src/dsp/upsampling_msa.c
+++ b/3rdparty/libwebp/src/dsp/upsampling_msa.c
@@ -264,6 +264,7 @@ static void YuvToBgr(int y, int u, int v, uint8_t* const bgr) {
   bgr[2] = Clip8(r1 >> 6);
 }
 
+#if !defined(WEBP_REDUCE_CSP)
 static void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
   const int y1 = MultHi(y, 19077);
   const int r1 = y1 + MultHi(v, 26149) - 14234;
@@ -306,6 +307,7 @@ static void YuvToArgb(uint8_t y, uint8_t u, uint8_t v, uint8_t* const argb) {
   argb[0] = 0xff;
   YuvToRgb(y, u, v, argb + 1);
 }
+#endif  // WEBP_REDUCE_CSP
 
 static void YuvToBgra(uint8_t y, uint8_t u, uint8_t v, uint8_t* const bgra) {
   YuvToBgr(y, u, v, bgra);
@@ -317,6 +319,7 @@ static void YuvToRgba(uint8_t y, uint8_t u, uint8_t v, uint8_t* const rgba) {
   rgba[3] = 0xff;
 }
 
+#if !defined(WEBP_REDUCE_CSP)
 static void YuvToRgbLine(const uint8_t* y, const uint8_t* u,
                          const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B;
@@ -370,6 +373,7 @@ static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
     memcpy(dst, temp, length * 3 * sizeof(*dst));
   }
 }
+#endif  // WEBP_REDUCE_CSP
 
 static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
                           const uint8_t* v, uint8_t* dst, int length) {
@@ -427,6 +431,7 @@ static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
   }
 }
 
+#if !defined(WEBP_REDUCE_CSP)
 static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
                           const uint8_t* v, uint8_t* dst, int length) {
   v16u8 R, G, B;
@@ -526,6 +531,7 @@ static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
     memcpy(dst, temp, length * 2 * sizeof(*dst));
   }
 }
+#endif  // WEBP_REDUCE_CSP
 
 #define UPSAMPLE_32PIXELS(a, b, c, d) do {    \
   v16u8 s = __msa_aver_u_b(a, d);             \
diff --git a/3rdparty/libwebp/src/dsp/upsampling_sse2.c b/3rdparty/libwebp/src/dsp/upsampling_sse2.c
index fd5d303982..340f1e2ac2 100644
--- a/3rdparty/libwebp/src/dsp/upsampling_sse2.c
+++ b/3rdparty/libwebp/src/dsp/upsampling_sse2.c
@@ -104,21 +104,6 @@ static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
   Upsample32Pixels_SSE2(r1, r2, out);                                          \
 }
 
-#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y,                              \
-                    top_dst, bottom_dst, cur_x, num_pixels) {                  \
-  int n;                                                                       \
-  for (n = 0; n < (num_pixels); ++n) {                                         \
-    FUNC((top_y)[(cur_x) + n], r_u[n], r_v[n],                                 \
-         (top_dst) + ((cur_x) + n) * (XSTEP));                                 \
-  }                                                                            \
-  if ((bottom_y) != NULL) {                                                    \
-    for (n = 0; n < (num_pixels); ++n) {                                       \
-      FUNC((bottom_y)[(cur_x) + n], r_u[64 + n], r_v[64 + n],                  \
-           (bottom_dst) + ((cur_x) + n) * (XSTEP));                            \
-    }                                                                          \
-  }                                                                            \
-}
-
 #define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y,                           \
                        top_dst, bottom_dst, cur_x) do {                        \
   FUNC##32_SSE2((top_y) + (cur_x), r_u, r_v, (top_dst) + (cur_x) * (XSTEP));   \
@@ -135,7 +120,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
                       uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
   int uv_pos, pos;                                                             \
   /* 16byte-aligned array to cache reconstructed u and v */                    \
-  uint8_t uv_buf[4 * 32 + 15];                                                 \
+  uint8_t uv_buf[14 * 32 + 15] = { 0 };                                        \
   uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);             \
   uint8_t* const r_v = r_u + 32;                                               \
                                                                                \
@@ -160,11 +145,22 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
   }                                                                            \
   if (len > 1) {                                                               \
     const int left_over = ((len + 1) >> 1) - (pos >> 1);                       \
+    uint8_t* const tmp_top_dst = r_u + 4 * 32;                                 \
+    uint8_t* const tmp_bottom_dst = tmp_top_dst + 4 * 32;                      \
+    uint8_t* const tmp_top = tmp_bottom_dst + 4 * 32;                          \
+    uint8_t* const tmp_bottom = (bottom_y == NULL) ? NULL : tmp_top + 32;      \
     assert(left_over > 0);                                                     \
     UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u);       \
     UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v);       \
-    CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst,             \
-                pos, len - pos);                                               \
+    memcpy(tmp_top, top_y + pos, len - pos);                                   \
+    if (bottom_y != NULL) memcpy(tmp_bottom, bottom_y + pos, len - pos);       \
+    CONVERT2RGB_32(FUNC, XSTEP, tmp_top, tmp_bottom, tmp_top_dst,              \
+         tmp_bottom_dst, 0);                                                   \
+    memcpy(top_dst + pos * (XSTEP), tmp_top_dst, (len - pos) * (XSTEP));       \
+    if (bottom_y != NULL) {                                                    \
+      memcpy(bottom_dst + pos * (XSTEP), tmp_bottom_dst,                       \
+             (len - pos) * (XSTEP));                                           \
+    }                                                                          \
   }                                                                            \
 }
 
diff --git a/3rdparty/libwebp/src/dsp/upsampling_sse41.c b/3rdparty/libwebp/src/dsp/upsampling_sse41.c
new file mode 100644
index 0000000000..648d456027
--- /dev/null
+++ b/3rdparty/libwebp/src/dsp/upsampling_sse41.c
@@ -0,0 +1,239 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE41 version of YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include <assert.h>
+#include <smmintrin.h>
+#include <string.h>
+#include "src/dsp/yuv.h"
+
+#ifdef FANCY_UPSAMPLING
+
+#if !defined(WEBP_REDUCE_CSP)
+
+// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
+// u = (9*a + 3*b + 3*c + d + 8) / 16
+//   = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
+//   = (a + m + 1) / 2
+// where m = (a + 3*b + 3*c + d) / 8
+//         = ((a + b + c + d) / 2 + b + c) / 4
+//
+// Let's say  k = (a + b + c + d) / 4.
+// We can compute k as
+// k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1
+// where s = (a + d + 1) / 2 and t = (b + c + 1) / 2
+//
+// Then m can be written as
+// m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1
+
+// Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1
+#define GET_M(ij, in, out) do {                                                \
+  const __m128i tmp0 = _mm_avg_epu8(k, (in));     /* (k + in + 1) / 2 */       \
+  const __m128i tmp1 = _mm_and_si128((ij), st);   /* (ij) & (s^t) */           \
+  const __m128i tmp2 = _mm_xor_si128(k, (in));    /* (k^in) */                 \
+  const __m128i tmp3 = _mm_or_si128(tmp1, tmp2);  /* ((ij) & (s^t)) | (k^in) */\
+  const __m128i tmp4 = _mm_and_si128(tmp3, one);  /* & 1 -> lsb_correction */  \
+  (out) = _mm_sub_epi8(tmp0, tmp4);    /* (k + in + 1) / 2 - lsb_correction */ \
+} while (0)
+
+// pack and store two alternating pixel rows
+#define PACK_AND_STORE(a, b, da, db, out) do {                                 \
+  const __m128i t_a = _mm_avg_epu8(a, da);  /* (9a + 3b + 3c +  d + 8) / 16 */ \
+  const __m128i t_b = _mm_avg_epu8(b, db);  /* (3a + 9b +  c + 3d + 8) / 16 */ \
+  const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b);                             \
+  const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b);                             \
+  _mm_store_si128(((__m128i*)(out)) + 0, t_1);                                 \
+  _mm_store_si128(((__m128i*)(out)) + 1, t_2);                                 \
+} while (0)
+
+// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
+#define UPSAMPLE_32PIXELS(r1, r2, out) {                                       \
+  const __m128i one = _mm_set1_epi8(1);                                        \
+  const __m128i a = _mm_loadu_si128((const __m128i*)&(r1)[0]);                 \
+  const __m128i b = _mm_loadu_si128((const __m128i*)&(r1)[1]);                 \
+  const __m128i c = _mm_loadu_si128((const __m128i*)&(r2)[0]);                 \
+  const __m128i d = _mm_loadu_si128((const __m128i*)&(r2)[1]);                 \
+                                                                               \
+  const __m128i s = _mm_avg_epu8(a, d);        /* s = (a + d + 1) / 2 */       \
+  const __m128i t = _mm_avg_epu8(b, c);        /* t = (b + c + 1) / 2 */       \
+  const __m128i st = _mm_xor_si128(s, t);      /* st = s^t */                  \
+                                                                               \
+  const __m128i ad = _mm_xor_si128(a, d);      /* ad = a^d */                  \
+  const __m128i bc = _mm_xor_si128(b, c);      /* bc = b^c */                  \
+                                                                               \
+  const __m128i t1 = _mm_or_si128(ad, bc);     /* (a^d) | (b^c) */             \
+  const __m128i t2 = _mm_or_si128(t1, st);     /* (a^d) | (b^c) | (s^t) */     \
+  const __m128i t3 = _mm_and_si128(t2, one);   /* (a^d) | (b^c) | (s^t) & 1 */ \
+  const __m128i t4 = _mm_avg_epu8(s, t);                                       \
+  const __m128i k = _mm_sub_epi8(t4, t3);      /* k = (a + b + c + d) / 4 */   \
+  __m128i diag1, diag2;                                                        \
+                                                                               \
+  GET_M(bc, t, diag1);                  /* diag1 = (a + 3b + 3c + d) / 8 */    \
+  GET_M(ad, s, diag2);                  /* diag2 = (3a + b + c + 3d) / 8 */    \
+                                                                               \
+  /* pack the alternate pixels */                                              \
+  PACK_AND_STORE(a, b, diag1, diag2, (out) +      0);  /* store top */         \
+  PACK_AND_STORE(c, d, diag2, diag1, (out) + 2 * 32);  /* store bottom */      \
+}
+
+// Turn the macro into a function for reducing code-size when non-critical
+static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
+                                  uint8_t* const out) {
+  UPSAMPLE_32PIXELS(r1, r2, out);
+}
+
+#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) {                         \
+  uint8_t r1[17], r2[17];                                                      \
+  memcpy(r1, (tb), (num_pixels));                                              \
+  memcpy(r2, (bb), (num_pixels));                                              \
+  /* replicate last byte */                                                    \
+  memset(r1 + (num_pixels), r1[(num_pixels) - 1], 17 - (num_pixels));          \
+  memset(r2 + (num_pixels), r2[(num_pixels) - 1], 17 - (num_pixels));          \
+  /* using the shared function instead of the macro saves ~3k code size */     \
+  Upsample32Pixels_SSE41(r1, r2, out);                                         \
+}
+
+#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y,                           \
+                       top_dst, bottom_dst, cur_x) do {                        \
+  FUNC##32_SSE41((top_y) + (cur_x), r_u, r_v, (top_dst) + (cur_x) * (XSTEP));  \
+  if ((bottom_y) != NULL) {                                                    \
+    FUNC##32_SSE41((bottom_y) + (cur_x), r_u + 64, r_v + 64,                   \
+                  (bottom_dst) + (cur_x) * (XSTEP));                           \
+  }                                                                            \
+} while (0)
+
+#define SSE4_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                             \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* cur_u, const uint8_t* cur_v,              \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int uv_pos, pos;                                                             \
+  /* 16byte-aligned array to cache reconstructed u and v */                    \
+  uint8_t uv_buf[14 * 32 + 15] = { 0 };                                        \
+  uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);             \
+  uint8_t* const r_v = r_u + 32;                                               \
+                                                                               \
+  assert(top_y != NULL);                                                       \
+  {   /* Treat the first pixel in regular way */                               \
+    const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                       \
+    const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                       \
+    const int u0_t = (top_u[0] + u_diag) >> 1;                                 \
+    const int v0_t = (top_v[0] + v_diag) >> 1;                                 \
+    FUNC(top_y[0], u0_t, v0_t, top_dst);                                       \
+    if (bottom_y != NULL) {                                                    \
+      const int u0_b = (cur_u[0] + u_diag) >> 1;                               \
+      const int v0_b = (cur_v[0] + v_diag) >> 1;                               \
+      FUNC(bottom_y[0], u0_b, v0_b, bottom_dst);                               \
+    }                                                                          \
+  }                                                                            \
+  /* For UPSAMPLE_32PIXELS, 17 u/v values must be read-able for each block */  \
+  for (pos = 1, uv_pos = 0; pos + 32 + 1 <= len; pos += 32, uv_pos += 16) {    \
+    UPSAMPLE_32PIXELS(top_u + uv_pos, cur_u + uv_pos, r_u);                    \
+    UPSAMPLE_32PIXELS(top_v + uv_pos, cur_v + uv_pos, r_v);                    \
+    CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, pos);    \
+  }                                                                            \
+  if (len > 1) {                                                               \
+    const int left_over = ((len + 1) >> 1) - (pos >> 1);                       \
+    uint8_t* const tmp_top_dst = r_u + 4 * 32;                                 \
+    uint8_t* const tmp_bottom_dst = tmp_top_dst + 4 * 32;                      \
+    uint8_t* const tmp_top = tmp_bottom_dst + 4 * 32;                          \
+    uint8_t* const tmp_bottom = (bottom_y == NULL) ? NULL : tmp_top + 32;      \
+    assert(left_over > 0);                                                     \
+    UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u);       \
+    UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v);       \
+    memcpy(tmp_top, top_y + pos, len - pos);                                   \
+    if (bottom_y != NULL) memcpy(tmp_bottom, bottom_y + pos, len - pos);       \
+    CONVERT2RGB_32(FUNC, XSTEP, tmp_top, tmp_bottom, tmp_top_dst,              \
+         tmp_bottom_dst, 0);                                                   \
+    memcpy(top_dst + pos * (XSTEP), tmp_top_dst, (len - pos) * (XSTEP));       \
+    if (bottom_y != NULL) {                                                    \
+      memcpy(bottom_dst + pos * (XSTEP), tmp_bottom_dst,                       \
+             (len - pos) * (XSTEP));                                           \
+    }                                                                          \
+  }                                                                            \
+}
+
+// SSE4 variants of the fancy upsampler.
+SSE4_UPSAMPLE_FUNC(UpsampleRgbLinePair_SSE41,  VP8YuvToRgb,  3)
+SSE4_UPSAMPLE_FUNC(UpsampleBgrLinePair_SSE41,  VP8YuvToBgr,  3)
+
+#undef GET_M
+#undef PACK_AND_STORE
+#undef UPSAMPLE_32PIXELS
+#undef UPSAMPLE_LAST_BLOCK
+#undef CONVERT2RGB
+#undef CONVERT2RGB_32
+#undef SSE4_UPSAMPLE_FUNC
+
+#endif   // WEBP_REDUCE_CSP
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+extern void WebPInitUpsamplersSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE41(void) {
+#if !defined(WEBP_REDUCE_CSP)
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair_SSE41;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair_SSE41;
+#endif   // WEBP_REDUCE_CSP
+}
+
+#endif  // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+
+extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+extern void WebPInitYUV444ConvertersSSE41(void);
+
+#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP)                            \
+extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v,       \
+                   uint8_t* dst, int len);                                     \
+static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
+                      uint8_t* dst, int len) {                                 \
+  int i;                                                                       \
+  const int max_len = len & ~31;                                               \
+  for (i = 0; i < max_len; i += 32) {                                          \
+    CALL(y + i, u + i, v + i, dst + i * (XSTEP));                              \
+  }                                                                            \
+  if (i < len) {  /* C-fallback */                                             \
+    CALL_C(y + i, u + i, v + i, dst + i * (XSTEP), len - i);                   \
+  }                                                                            \
+}
+
+#if !defined(WEBP_REDUCE_CSP)
+YUV444_FUNC(Yuv444ToRgb_SSE41, VP8YuvToRgb32_SSE41, WebPYuv444ToRgb_C, 3);
+YUV444_FUNC(Yuv444ToBgr_SSE41, VP8YuvToBgr32_SSE41, WebPYuv444ToBgr_C, 3);
+#endif  // WEBP_REDUCE_CSP
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE41(void) {
+#if !defined(WEBP_REDUCE_CSP)
+  WebPYUV444Converters[MODE_RGB]       = Yuv444ToRgb_SSE41;
+  WebPYUV444Converters[MODE_BGR]       = Yuv444ToBgr_SSE41;
+#endif   // WEBP_REDUCE_CSP
+}
+
+#else
+
+WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE41)
+
+#endif  // WEBP_USE_SSE41
+
+#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE41))
+WEBP_DSP_INIT_STUB(WebPInitUpsamplersSSE41)
+#endif
diff --git a/3rdparty/libwebp/src/dsp/yuv.c b/3rdparty/libwebp/src/dsp/yuv.c
index bddf81fe09..14e67fc28e 100644
--- a/3rdparty/libwebp/src/dsp/yuv.c
+++ b/3rdparty/libwebp/src/dsp/yuv.c
@@ -71,15 +71,11 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
 WebPSamplerRowFunc WebPSamplers[MODE_LAST];
 
 extern void WebPInitSamplersSSE2(void);
+extern void WebPInitSamplersSSE41(void);
 extern void WebPInitSamplersMIPS32(void);
 extern void WebPInitSamplersMIPSdspR2(void);
 
-static volatile VP8CPUInfo yuv_last_cpuinfo_used =
-    (VP8CPUInfo)&yuv_last_cpuinfo_used;
-
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
-  if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
   WebPSamplers[MODE_RGB]       = YuvToRgbRow;
   WebPSamplers[MODE_RGBA]      = YuvToRgbaRow;
   WebPSamplers[MODE_BGR]       = YuvToBgrRow;
@@ -99,6 +95,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
       WebPInitSamplersSSE2();
     }
 #endif  // WEBP_USE_SSE2
+#if defined(WEBP_USE_SSE41)
+    if (VP8GetCPUInfo(kSSE4_1)) {
+      WebPInitSamplersSSE41();
+    }
+#endif  // WEBP_USE_SSE41
 #if defined(WEBP_USE_MIPS32)
     if (VP8GetCPUInfo(kMIPS32)) {
       WebPInitSamplersMIPS32();
@@ -110,7 +111,6 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
     }
 #endif  // WEBP_USE_MIPS_DSP_R2
   }
-  yuv_last_cpuinfo_used = VP8GetCPUInfo;
 }
 
 //-----------------------------------------------------------------------------
@@ -254,17 +254,13 @@ void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
 void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
                               const uint16_t* best_y, uint16_t* out);
 
-static volatile VP8CPUInfo rgba_to_yuv_last_cpuinfo_used =
-    (VP8CPUInfo)&rgba_to_yuv_last_cpuinfo_used;
-
 extern void WebPInitConvertARGBToYUVSSE2(void);
+extern void WebPInitConvertARGBToYUVSSE41(void);
 extern void WebPInitConvertARGBToYUVNEON(void);
 extern void WebPInitSharpYUVSSE2(void);
 extern void WebPInitSharpYUVNEON(void);
 
-WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
-  if (rgba_to_yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
-
+WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
   WebPConvertARGBToY = ConvertARGBToY_C;
   WebPConvertARGBToUV = WebPConvertARGBToUV_C;
 
@@ -286,6 +282,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
       WebPInitSharpYUVSSE2();
     }
 #endif  // WEBP_USE_SSE2
+#if defined(WEBP_USE_SSE41)
+    if (VP8GetCPUInfo(kSSE4_1)) {
+      WebPInitConvertARGBToYUVSSE41();
+    }
+#endif  // WEBP_USE_SSE41
   }
 
 #if defined(WEBP_USE_NEON)
@@ -304,6 +305,4 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
   assert(WebPSharpYUVUpdateY != NULL);
   assert(WebPSharpYUVUpdateRGB != NULL);
   assert(WebPSharpYUVFilterRow != NULL);
-
-  rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo;
 }
diff --git a/3rdparty/libwebp/src/dsp/yuv.h b/3rdparty/libwebp/src/dsp/yuv.h
index c8a55832d4..eb787270d2 100644
--- a/3rdparty/libwebp/src/dsp/yuv.h
+++ b/3rdparty/libwebp/src/dsp/yuv.h
@@ -166,6 +166,19 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
 
 #endif    // WEBP_USE_SSE2
 
+//-----------------------------------------------------------------------------
+// SSE41 extra functions (mostly for upsampling_sse41.c)
+
+#if defined(WEBP_USE_SSE41)
+
+// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
+void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst);
+void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst);
+
+#endif    // WEBP_USE_SSE41
+
 //------------------------------------------------------------------------------
 // RGB -> YUV conversion
 
diff --git a/3rdparty/libwebp/src/dsp/yuv_sse2.c b/3rdparty/libwebp/src/dsp/yuv_sse2.c
index 6810bf8d15..baa48d5371 100644
--- a/3rdparty/libwebp/src/dsp/yuv_sse2.c
+++ b/3rdparty/libwebp/src/dsp/yuv_sse2.c
@@ -180,7 +180,7 @@ static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
   // Repeat the same permutations twice more:
   //   r0r4g0g4 | b0b4r1r5 | g1g5b1b5 | r2r6g2g6 | b2b6r3r7 | g3g7b3b7
   //   r0g0b0r1 | g1b1r2g2 | b2r3g3b3 | r4g4b4r5 | g5b5r6g6 | b6r7g7b7
-  VP8PlanarTo24b(in0, in1, in2, in3, in4, in5);
+  VP8PlanarTo24b_SSE2(in0, in1, in2, in3, in4, in5);
 
   _mm_storeu_si128((__m128i*)(rgb +  0), *in0);
   _mm_storeu_si128((__m128i*)(rgb + 16), *in1);
@@ -492,7 +492,7 @@ static WEBP_INLINE void RGB32PackedToPlanar_SSE2(const uint32_t* const argb,
   __m128i a1 = LOAD_16(argb + 4);
   __m128i a2 = LOAD_16(argb + 8);
   __m128i a3 = LOAD_16(argb + 12);
-  VP8L32bToPlanar(&a0, &a1, &a2, &a3);
+  VP8L32bToPlanar_SSE2(&a0, &a1, &a2, &a3);
   rgb[0] = _mm_unpacklo_epi8(a1, zero);
   rgb[1] = _mm_unpackhi_epi8(a1, zero);
   rgb[2] = _mm_unpacklo_epi8(a2, zero);
diff --git a/3rdparty/libwebp/src/dsp/yuv_sse41.c b/3rdparty/libwebp/src/dsp/yuv_sse41.c
new file mode 100644
index 0000000000..579d1f7402
--- /dev/null
+++ b/3rdparty/libwebp/src/dsp/yuv_sse41.c
@@ -0,0 +1,613 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/yuv.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include "src/dsp/common_sse41.h"
+#include <stdlib.h>
+#include <smmintrin.h>
+
+//-----------------------------------------------------------------------------
+// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
+
+// These constants are 14b fixed-point version of ITU-R BT.601 constants.
+// R = (19077 * y             + 26149 * v - 14234) >> 6
+// G = (19077 * y -  6419 * u - 13320 * v +  8708) >> 6
+// B = (19077 * y + 33050 * u             - 17685) >> 6
+static void ConvertYUV444ToRGB_SSE41(const __m128i* const Y0,
+                                     const __m128i* const U0,
+                                     const __m128i* const V0,
+                                     __m128i* const R,
+                                     __m128i* const G,
+                                     __m128i* const B) {
+  const __m128i k19077 = _mm_set1_epi16(19077);
+  const __m128i k26149 = _mm_set1_epi16(26149);
+  const __m128i k14234 = _mm_set1_epi16(14234);
+  // 33050 doesn't fit in a signed short: only use this with unsigned arithmetic
+  const __m128i k33050 = _mm_set1_epi16((short)33050);
+  const __m128i k17685 = _mm_set1_epi16(17685);
+  const __m128i k6419  = _mm_set1_epi16(6419);
+  const __m128i k13320 = _mm_set1_epi16(13320);
+  const __m128i k8708  = _mm_set1_epi16(8708);
+
+  const __m128i Y1 = _mm_mulhi_epu16(*Y0, k19077);
+
+  const __m128i R0 = _mm_mulhi_epu16(*V0, k26149);
+  const __m128i R1 = _mm_sub_epi16(Y1, k14234);
+  const __m128i R2 = _mm_add_epi16(R1, R0);
+
+  const __m128i G0 = _mm_mulhi_epu16(*U0, k6419);
+  const __m128i G1 = _mm_mulhi_epu16(*V0, k13320);
+  const __m128i G2 = _mm_add_epi16(Y1, k8708);
+  const __m128i G3 = _mm_add_epi16(G0, G1);
+  const __m128i G4 = _mm_sub_epi16(G2, G3);
+
+  // be careful with the saturated *unsigned* arithmetic here!
+  const __m128i B0 = _mm_mulhi_epu16(*U0, k33050);
+  const __m128i B1 = _mm_adds_epu16(B0, Y1);
+  const __m128i B2 = _mm_subs_epu16(B1, k17685);
+
+  // use logical shift for B2, which can be larger than 32767
+  *R = _mm_srai_epi16(R2, 6);   // range: [-14234, 30815]
+  *G = _mm_srai_epi16(G4, 6);   // range: [-10953, 27710]
+  *B = _mm_srli_epi16(B2, 6);   // range: [0, 34238]
+}
+
+// Load the bytes into the *upper* part of 16b words. That's "<< 8", basically.
+static WEBP_INLINE __m128i Load_HI_16_SSE41(const uint8_t* src) {
+  const __m128i zero = _mm_setzero_si128();
+  return _mm_unpacklo_epi8(zero, _mm_loadl_epi64((const __m128i*)src));
+}
+
+// Load and replicate the U/V samples
+static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
+  const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
+  return _mm_unpacklo_epi16(tmp1, tmp1);   // replicate samples
+}
+
+// Convert 32 samples of YUV444 to R/G/B
+static void YUV444ToRGB_SSE41(const uint8_t* const y,
+                              const uint8_t* const u,
+                              const uint8_t* const v,
+                              __m128i* const R, __m128i* const G,
+                              __m128i* const B) {
+  const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_HI_16_SSE41(u),
+                V0 = Load_HI_16_SSE41(v);
+  ConvertYUV444ToRGB_SSE41(&Y0, &U0, &V0, R, G, B);
+}
+
+// Convert 32 samples of YUV420 to R/G/B
+static void YUV420ToRGB_SSE41(const uint8_t* const y,
+                              const uint8_t* const u,
+                              const uint8_t* const v,
+                              __m128i* const R, __m128i* const G,
+                              __m128i* const B) {
+  const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_UV_HI_8_SSE41(u),
+                V0 = Load_UV_HI_8_SSE41(v);
+  ConvertYUV444ToRGB_SSE41(&Y0, &U0, &V0, R, G, B);
+}
+
+// Pack the planar buffers
+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
+// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
+static WEBP_INLINE void PlanarTo24b_SSE41(
+    __m128i* const in0, __m128i* const in1, __m128i* const in2,
+    __m128i* const in3, __m128i* const in4, __m128i* const in5,
+    uint8_t* const rgb) {
+  // The input is 6 registers of sixteen 8b but for the sake of explanation,
+  // let's take 6 registers of four 8b values.
+  // To pack, we will keep taking one every two 8b integer and move it
+  // around as follows:
+  // Input:
+  //   r0r1r2r3 | r4r5r6r7 | g0g1g2g3 | g4g5g6g7 | b0b1b2b3 | b4b5b6b7
+  // Split the 6 registers in two sets of 3 registers: the first set as the even
+  // 8b bytes, the second the odd ones:
+  //   r0r2r4r6 | g0g2g4g6 | b0b2b4b6 | r1r3r5r7 | g1g3g5g7 | b1b3b5b7
+  // Repeat the same permutations twice more:
+  //   r0r4g0g4 | b0b4r1r5 | g1g5b1b5 | r2r6g2g6 | b2b6r3r7 | g3g7b3b7
+  //   r0g0b0r1 | g1b1r2g2 | b2r3g3b3 | r4g4b4r5 | g5b5r6g6 | b6r7g7b7
+  VP8PlanarTo24b_SSE41(in0, in1, in2, in3, in4, in5);
+
+  _mm_storeu_si128((__m128i*)(rgb +  0), *in0);
+  _mm_storeu_si128((__m128i*)(rgb + 16), *in1);
+  _mm_storeu_si128((__m128i*)(rgb + 32), *in2);
+  _mm_storeu_si128((__m128i*)(rgb + 48), *in3);
+  _mm_storeu_si128((__m128i*)(rgb + 64), *in4);
+  _mm_storeu_si128((__m128i*)(rgb + 80), *in5);
+}
+
+void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst) {
+  __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+  __m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
+
+  YUV444ToRGB_SSE41(y + 0, u + 0, v + 0, &R0, &G0, &B0);
+  YUV444ToRGB_SSE41(y + 8, u + 8, v + 8, &R1, &G1, &B1);
+  YUV444ToRGB_SSE41(y + 16, u + 16, v + 16, &R2, &G2, &B2);
+  YUV444ToRGB_SSE41(y + 24, u + 24, v + 24, &R3, &G3, &B3);
+
+  // Cast to 8b and store as RRRRGGGGBBBB.
+  rgb0 = _mm_packus_epi16(R0, R1);
+  rgb1 = _mm_packus_epi16(R2, R3);
+  rgb2 = _mm_packus_epi16(G0, G1);
+  rgb3 = _mm_packus_epi16(G2, G3);
+  rgb4 = _mm_packus_epi16(B0, B1);
+  rgb5 = _mm_packus_epi16(B2, B3);
+
+  // Pack as RGBRGBRGBRGB.
+  PlanarTo24b_SSE41(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
+}
+
+void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst) {
+  __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+  __m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
+
+  YUV444ToRGB_SSE41(y +  0, u +  0, v +  0, &R0, &G0, &B0);
+  YUV444ToRGB_SSE41(y +  8, u +  8, v +  8, &R1, &G1, &B1);
+  YUV444ToRGB_SSE41(y + 16, u + 16, v + 16, &R2, &G2, &B2);
+  YUV444ToRGB_SSE41(y + 24, u + 24, v + 24, &R3, &G3, &B3);
+
+  // Cast to 8b and store as BBBBGGGGRRRR.
+  bgr0 = _mm_packus_epi16(B0, B1);
+  bgr1 = _mm_packus_epi16(B2, B3);
+  bgr2 = _mm_packus_epi16(G0, G1);
+  bgr3 = _mm_packus_epi16(G2, G3);
+  bgr4 = _mm_packus_epi16(R0, R1);
+  bgr5= _mm_packus_epi16(R2, R3);
+
+  // Pack as BGRBGRBGRBGR.
+  PlanarTo24b_SSE41(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
+}
+
+//-----------------------------------------------------------------------------
+// Arbitrary-length row conversion functions
+
+static void YuvToRgbRow_SSE41(const uint8_t* y,
+                              const uint8_t* u, const uint8_t* v,
+                              uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
+    __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+    __m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
+
+    YUV420ToRGB_SSE41(y +  0, u +  0, v +  0, &R0, &G0, &B0);
+    YUV420ToRGB_SSE41(y +  8, u +  4, v +  4, &R1, &G1, &B1);
+    YUV420ToRGB_SSE41(y + 16, u +  8, v +  8, &R2, &G2, &B2);
+    YUV420ToRGB_SSE41(y + 24, u + 12, v + 12, &R3, &G3, &B3);
+
+    // Cast to 8b and store as RRRRGGGGBBBB.
+    rgb0 = _mm_packus_epi16(R0, R1);
+    rgb1 = _mm_packus_epi16(R2, R3);
+    rgb2 = _mm_packus_epi16(G0, G1);
+    rgb3 = _mm_packus_epi16(G2, G3);
+    rgb4 = _mm_packus_epi16(B0, B1);
+    rgb5 = _mm_packus_epi16(B2, B3);
+
+    // Pack as RGBRGBRGBRGB.
+    PlanarTo24b_SSE41(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
+
+    y += 32;
+    u += 16;
+    v += 16;
+  }
+  for (; n < len; ++n) {   // Finish off
+    VP8YuvToRgb(y[0], u[0], v[0], dst);
+    dst += 3;
+    y += 1;
+    u += (n & 1);
+    v += (n & 1);
+  }
+}
+
+static void YuvToBgrRow_SSE41(const uint8_t* y,
+                              const uint8_t* u, const uint8_t* v,
+                              uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
+    __m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
+    __m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
+
+    YUV420ToRGB_SSE41(y +  0, u +  0, v +  0, &R0, &G0, &B0);
+    YUV420ToRGB_SSE41(y +  8, u +  4, v +  4, &R1, &G1, &B1);
+    YUV420ToRGB_SSE41(y + 16, u +  8, v +  8, &R2, &G2, &B2);
+    YUV420ToRGB_SSE41(y + 24, u + 12, v + 12, &R3, &G3, &B3);
+
+    // Cast to 8b and store as BBBBGGGGRRRR.
+    bgr0 = _mm_packus_epi16(B0, B1);
+    bgr1 = _mm_packus_epi16(B2, B3);
+    bgr2 = _mm_packus_epi16(G0, G1);
+    bgr3 = _mm_packus_epi16(G2, G3);
+    bgr4 = _mm_packus_epi16(R0, R1);
+    bgr5 = _mm_packus_epi16(R2, R3);
+
+    // Pack as BGRBGRBGRBGR.
+    PlanarTo24b_SSE41(&bgr0, &bgr1, &bgr2, &bgr3, &bgr4, &bgr5, dst);
+
+    y += 32;
+    u += 16;
+    v += 16;
+  }
+  for (; n < len; ++n) {   // Finish off
+    VP8YuvToBgr(y[0], u[0], v[0], dst);
+    dst += 3;
+    y += 1;
+    u += (n & 1);
+    v += (n & 1);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE41(void) {
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow_SSE41;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow_SSE41;
+}
+
+//------------------------------------------------------------------------------
+// RGB24/32 -> YUV converters
+
+// Load eight 16b-words from *src.
+#define LOAD_16(src) _mm_loadu_si128((const __m128i*)(src))
+// Store either 16b-words into *dst
+#define STORE_16(V, dst) _mm_storeu_si128((__m128i*)(dst), (V))
+
+#define WEBP_SSE41_SHUFF(OUT)  do {                  \
+  const __m128i tmp0 = _mm_shuffle_epi8(A0, shuff0); \
+  const __m128i tmp1 = _mm_shuffle_epi8(A1, shuff1); \
+  const __m128i tmp2 = _mm_shuffle_epi8(A2, shuff2); \
+  const __m128i tmp3 = _mm_shuffle_epi8(A3, shuff0); \
+  const __m128i tmp4 = _mm_shuffle_epi8(A4, shuff1); \
+  const __m128i tmp5 = _mm_shuffle_epi8(A5, shuff2); \
+                                                     \
+  /* OR everything to get one channel */             \
+  const __m128i tmp6 = _mm_or_si128(tmp0, tmp1);     \
+  const __m128i tmp7 = _mm_or_si128(tmp3, tmp4);     \
+  out[OUT + 0] = _mm_or_si128(tmp6, tmp2);           \
+  out[OUT + 1] = _mm_or_si128(tmp7, tmp5);           \
+} while (0);
+
+// Unpack the 8b input rgbrgbrgbrgb ... as contiguous registers:
+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
+// Similar to PlanarTo24bHelper(), but in reverse order.
+static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
+    const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
+  const __m128i A0 = _mm_loadu_si128((const __m128i*)(rgb +  0));
+  const __m128i A1 = _mm_loadu_si128((const __m128i*)(rgb + 16));
+  const __m128i A2 = _mm_loadu_si128((const __m128i*)(rgb + 32));
+  const __m128i A3 = _mm_loadu_si128((const __m128i*)(rgb + 48));
+  const __m128i A4 = _mm_loadu_si128((const __m128i*)(rgb + 64));
+  const __m128i A5 = _mm_loadu_si128((const __m128i*)(rgb + 80));
+
+  // Compute RR.
+  {
+    const __m128i shuff0 = _mm_set_epi8(
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
+    const __m128i shuff1 = _mm_set_epi8(
+        -1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
+    const __m128i shuff2 = _mm_set_epi8(
+        13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+    WEBP_SSE41_SHUFF(0)
+  }
+  // Compute GG.
+  {
+    const __m128i shuff0 = _mm_set_epi8(
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
+    const __m128i shuff1 = _mm_set_epi8(
+        -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
+    const __m128i shuff2 = _mm_set_epi8(
+        14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+    WEBP_SSE41_SHUFF(2)
+  }
+  // Compute BB.
+  {
+    const __m128i shuff0 = _mm_set_epi8(
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);
+    const __m128i shuff1 = _mm_set_epi8(
+        -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
+    const __m128i shuff2 = _mm_set_epi8(
+        15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+    WEBP_SSE41_SHUFF(4)
+  }
+}
+
+#undef WEBP_SSE41_SHUFF
+
+// Convert 8 packed ARGB to r[], g[], b[]
+static WEBP_INLINE void RGB32PackedToPlanar_SSE41(
+    const uint32_t* const argb, __m128i* const rgb /*in[6]*/) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i a0 = LOAD_16(argb + 0);
+  __m128i a1 = LOAD_16(argb + 4);
+  __m128i a2 = LOAD_16(argb + 8);
+  __m128i a3 = LOAD_16(argb + 12);
+  VP8L32bToPlanar_SSE41(&a0, &a1, &a2, &a3);
+  rgb[0] = _mm_unpacklo_epi8(a1, zero);
+  rgb[1] = _mm_unpackhi_epi8(a1, zero);
+  rgb[2] = _mm_unpacklo_epi8(a2, zero);
+  rgb[3] = _mm_unpackhi_epi8(a2, zero);
+  rgb[4] = _mm_unpacklo_epi8(a3, zero);
+  rgb[5] = _mm_unpackhi_epi8(a3, zero);
+}
+
+// This macro computes (RG * MULT_RG + GB * MULT_GB + ROUNDER) >> DESCALE_FIX
+// It's a macro and not a function because we need to use immediate values with
+// srai_epi32, e.g.
+#define TRANSFORM(RG_LO, RG_HI, GB_LO, GB_HI, MULT_RG, MULT_GB, \
+                  ROUNDER, DESCALE_FIX, OUT) do {               \
+  const __m128i V0_lo = _mm_madd_epi16(RG_LO, MULT_RG);         \
+  const __m128i V0_hi = _mm_madd_epi16(RG_HI, MULT_RG);         \
+  const __m128i V1_lo = _mm_madd_epi16(GB_LO, MULT_GB);         \
+  const __m128i V1_hi = _mm_madd_epi16(GB_HI, MULT_GB);         \
+  const __m128i V2_lo = _mm_add_epi32(V0_lo, V1_lo);            \
+  const __m128i V2_hi = _mm_add_epi32(V0_hi, V1_hi);            \
+  const __m128i V3_lo = _mm_add_epi32(V2_lo, ROUNDER);          \
+  const __m128i V3_hi = _mm_add_epi32(V2_hi, ROUNDER);          \
+  const __m128i V5_lo = _mm_srai_epi32(V3_lo, DESCALE_FIX);     \
+  const __m128i V5_hi = _mm_srai_epi32(V3_hi, DESCALE_FIX);     \
+  (OUT) = _mm_packs_epi32(V5_lo, V5_hi);                        \
+} while (0)
+
+#define MK_CST_16(A, B) _mm_set_epi16((B), (A), (B), (A), (B), (A), (B), (A))
+static WEBP_INLINE void ConvertRGBToY_SSE41(const __m128i* const R,
+                                            const __m128i* const G,
+                                            const __m128i* const B,
+                                            __m128i* const Y) {
+  const __m128i kRG_y = MK_CST_16(16839, 33059 - 16384);
+  const __m128i kGB_y = MK_CST_16(16384, 6420);
+  const __m128i kHALF_Y = _mm_set1_epi32((16 << YUV_FIX) + YUV_HALF);
+
+  const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
+  const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
+  const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
+  const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
+  TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_y, kGB_y, kHALF_Y, YUV_FIX, *Y);
+}
+
+static WEBP_INLINE void ConvertRGBToUV_SSE41(const __m128i* const R,
+                                             const __m128i* const G,
+                                             const __m128i* const B,
+                                             __m128i* const U,
+                                             __m128i* const V) {
+  const __m128i kRG_u = MK_CST_16(-9719, -19081);
+  const __m128i kGB_u = MK_CST_16(0, 28800);
+  const __m128i kRG_v = MK_CST_16(28800, 0);
+  const __m128i kGB_v = MK_CST_16(-24116, -4684);
+  const __m128i kHALF_UV = _mm_set1_epi32(((128 << YUV_FIX) + YUV_HALF) << 2);
+
+  const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
+  const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
+  const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
+  const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
+  TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_u, kGB_u,
+            kHALF_UV, YUV_FIX + 2, *U);
+  TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_v, kGB_v,
+            kHALF_UV, YUV_FIX + 2, *V);
+}
+
+#undef MK_CST_16
+#undef TRANSFORM
+
+static void ConvertRGB24ToY_SSE41(const uint8_t* rgb, uint8_t* y, int width) {
+  const int max_width = width & ~31;
+  int i;
+  for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
+    __m128i rgb_plane[6];
+    int j;
+
+    RGB24PackedToPlanar_SSE41(rgb, rgb_plane);
+
+    for (j = 0; j < 2; ++j, i += 16) {
+      const __m128i zero = _mm_setzero_si128();
+      __m128i r, g, b, Y0, Y1;
+
+      // Convert to 16-bit Y.
+      r = _mm_unpacklo_epi8(rgb_plane[0 + j], zero);
+      g = _mm_unpacklo_epi8(rgb_plane[2 + j], zero);
+      b = _mm_unpacklo_epi8(rgb_plane[4 + j], zero);
+      ConvertRGBToY_SSE41(&r, &g, &b, &Y0);
+
+      // Convert to 16-bit Y.
+      r = _mm_unpackhi_epi8(rgb_plane[0 + j], zero);
+      g = _mm_unpackhi_epi8(rgb_plane[2 + j], zero);
+      b = _mm_unpackhi_epi8(rgb_plane[4 + j], zero);
+      ConvertRGBToY_SSE41(&r, &g, &b, &Y1);
+
+      // Cast to 8-bit and store.
+      STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+    }
+  }
+  for (; i < width; ++i, rgb += 3) {   // left-over
+    y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
+  }
+}
+
+static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
+  const int max_width = width & ~31;
+  int i;
+  for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
+    __m128i bgr_plane[6];
+    int j;
+
+    RGB24PackedToPlanar_SSE41(bgr, bgr_plane);
+
+    for (j = 0; j < 2; ++j, i += 16) {
+      const __m128i zero = _mm_setzero_si128();
+      __m128i r, g, b, Y0, Y1;
+
+      // Convert to 16-bit Y.
+      b = _mm_unpacklo_epi8(bgr_plane[0 + j], zero);
+      g = _mm_unpacklo_epi8(bgr_plane[2 + j], zero);
+      r = _mm_unpacklo_epi8(bgr_plane[4 + j], zero);
+      ConvertRGBToY_SSE41(&r, &g, &b, &Y0);
+
+      // Convert to 16-bit Y.
+      b = _mm_unpackhi_epi8(bgr_plane[0 + j], zero);
+      g = _mm_unpackhi_epi8(bgr_plane[2 + j], zero);
+      r = _mm_unpackhi_epi8(bgr_plane[4 + j], zero);
+      ConvertRGBToY_SSE41(&r, &g, &b, &Y1);
+
+      // Cast to 8-bit and store.
+      STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+    }
+  }
+  for (; i < width; ++i, bgr += 3) {  // left-over
+    y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
+  }
+}
+
+static void ConvertARGBToY_SSE41(const uint32_t* argb, uint8_t* y, int width) {
+  const int max_width = width & ~15;
+  int i;
+  for (i = 0; i < max_width; i += 16) {
+    __m128i Y0, Y1, rgb[6];
+    RGB32PackedToPlanar_SSE41(&argb[i], rgb);
+    ConvertRGBToY_SSE41(&rgb[0], &rgb[2], &rgb[4], &Y0);
+    ConvertRGBToY_SSE41(&rgb[1], &rgb[3], &rgb[5], &Y1);
+    STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+  }
+  for (; i < width; ++i) {   // left-over
+    const uint32_t p = argb[i];
+    y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >>  0) & 0xff,
+                     YUV_HALF);
+  }
+}
+
+// Horizontal add (doubled) of two 16b values, result is 16b.
+// in: A | B | C | D | ... -> out: 2*(A+B) | 2*(C+D) | ...
+static void HorizontalAddPack_SSE41(const __m128i* const A,
+                                    const __m128i* const B,
+                                    __m128i* const out) {
+  const __m128i k2 = _mm_set1_epi16(2);
+  const __m128i C = _mm_madd_epi16(*A, k2);
+  const __m128i D = _mm_madd_epi16(*B, k2);
+  *out = _mm_packs_epi32(C, D);
+}
+
+static void ConvertARGBToUV_SSE41(const uint32_t* argb,
+                                  uint8_t* u, uint8_t* v,
+                                  int src_width, int do_store) {
+  const int max_width = src_width & ~31;
+  int i;
+  for (i = 0; i < max_width; i += 32, u += 16, v += 16) {
+    __m128i rgb[6], U0, V0, U1, V1;
+    RGB32PackedToPlanar_SSE41(&argb[i], rgb);
+    HorizontalAddPack_SSE41(&rgb[0], &rgb[1], &rgb[0]);
+    HorizontalAddPack_SSE41(&rgb[2], &rgb[3], &rgb[2]);
+    HorizontalAddPack_SSE41(&rgb[4], &rgb[5], &rgb[4]);
+    ConvertRGBToUV_SSE41(&rgb[0], &rgb[2], &rgb[4], &U0, &V0);
+
+    RGB32PackedToPlanar_SSE41(&argb[i + 16], rgb);
+    HorizontalAddPack_SSE41(&rgb[0], &rgb[1], &rgb[0]);
+    HorizontalAddPack_SSE41(&rgb[2], &rgb[3], &rgb[2]);
+    HorizontalAddPack_SSE41(&rgb[4], &rgb[5], &rgb[4]);
+    ConvertRGBToUV_SSE41(&rgb[0], &rgb[2], &rgb[4], &U1, &V1);
+
+    U0 = _mm_packus_epi16(U0, U1);
+    V0 = _mm_packus_epi16(V0, V1);
+    if (!do_store) {
+      const __m128i prev_u = LOAD_16(u);
+      const __m128i prev_v = LOAD_16(v);
+      U0 = _mm_avg_epu8(U0, prev_u);
+      V0 = _mm_avg_epu8(V0, prev_v);
+    }
+    STORE_16(U0, u);
+    STORE_16(V0, v);
+  }
+  if (i < src_width) {  // left-over
+    WebPConvertARGBToUV_C(argb + i, u, v, src_width - i, do_store);
+  }
+}
+
+// Convert 16 packed ARGB 16b-values to r[], g[], b[]
+static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
+    const uint16_t* const rgbx,
+    __m128i* const r, __m128i* const g, __m128i* const b) {
+  const __m128i in0 = LOAD_16(rgbx +  0);  // r0 | g0 | b0 |x| r1 | g1 | b1 |x
+  const __m128i in1 = LOAD_16(rgbx +  8);  // r2 | g2 | b2 |x| r3 | g3 | b3 |x
+  const __m128i in2 = LOAD_16(rgbx + 16);  // r4 | ...
+  const __m128i in3 = LOAD_16(rgbx + 24);  // r6 | ...
+  // aarrggbb as 16-bit.
+  const __m128i shuff0 =
+      _mm_set_epi8(-1, -1, -1, -1, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0);
+  const __m128i shuff1 =
+      _mm_set_epi8(13, 12, 5, 4, -1, -1, -1, -1, 11, 10, 3, 2, 9, 8, 1, 0);
+  const __m128i A0 = _mm_shuffle_epi8(in0, shuff0);
+  const __m128i A1 = _mm_shuffle_epi8(in1, shuff1);
+  const __m128i A2 = _mm_shuffle_epi8(in2, shuff0);
+  const __m128i A3 = _mm_shuffle_epi8(in3, shuff1);
+  // R0R1G0G1
+  // B0B1****
+  // R2R3G2G3
+  // B2B3****
+  // (OR is used to free port 5 for the unpack)
+  const __m128i B0 = _mm_unpacklo_epi32(A0, A1);
+  const __m128i B1 = _mm_or_si128(A0, A1);
+  const __m128i B2 = _mm_unpacklo_epi32(A2, A3);
+  const __m128i B3 = _mm_or_si128(A2, A3);
+  // Gather the channels.
+  *r = _mm_unpacklo_epi64(B0, B2);
+  *g = _mm_unpackhi_epi64(B0, B2);
+  *b = _mm_unpackhi_epi64(B1, B3);
+}
+
+static void ConvertRGBA32ToUV_SSE41(const uint16_t* rgb,
+                                    uint8_t* u, uint8_t* v, int width) {
+  const int max_width = width & ~15;
+  const uint16_t* const last_rgb = rgb + 4 * max_width;
+  while (rgb < last_rgb) {
+    __m128i r, g, b, U0, V0, U1, V1;
+    RGBA32PackedToPlanar_16b_SSE41(rgb +  0, &r, &g, &b);
+    ConvertRGBToUV_SSE41(&r, &g, &b, &U0, &V0);
+    RGBA32PackedToPlanar_16b_SSE41(rgb + 32, &r, &g, &b);
+    ConvertRGBToUV_SSE41(&r, &g, &b, &U1, &V1);
+    STORE_16(_mm_packus_epi16(U0, U1), u);
+    STORE_16(_mm_packus_epi16(V0, V1), v);
+    u += 16;
+    v += 16;
+    rgb += 2 * 32;
+  }
+  if (max_width < width) {  // left-over
+    WebPConvertRGBA32ToUV_C(rgb, u, v, width - max_width);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitConvertARGBToYUVSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE41(void) {
+  WebPConvertARGBToY = ConvertARGBToY_SSE41;
+  WebPConvertARGBToUV = ConvertARGBToUV_SSE41;
+
+  WebPConvertRGB24ToY = ConvertRGB24ToY_SSE41;
+  WebPConvertBGR24ToY = ConvertBGR24ToY_SSE41;
+
+  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE41;
+}
+
+//------------------------------------------------------------------------------
+
+#else  // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(WebPInitSamplersSSE41)
+WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE41)
+
+#endif  // WEBP_USE_SSE41
diff --git a/3rdparty/libwebp/src/enc/alpha_enc.c b/3rdparty/libwebp/src/enc/alpha_enc.c
index 7e8d87f22e..dce9ca957d 100644
--- a/3rdparty/libwebp/src/enc/alpha_enc.c
+++ b/3rdparty/libwebp/src/enc/alpha_enc.c
@@ -361,7 +361,8 @@ static int EncodeAlpha(VP8Encoder* const enc,
 //------------------------------------------------------------------------------
 // Main calls
 
-static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
+static int CompressAlphaJob(void* arg1, void* dummy) {
+  VP8Encoder* const enc = (VP8Encoder*)arg1;
   const WebPConfig* config = enc->config_;
   uint8_t* alpha_data = NULL;
   size_t alpha_size = 0;
@@ -394,7 +395,7 @@ void VP8EncInitAlpha(VP8Encoder* const enc) {
     WebPGetWorkerInterface()->Init(worker);
     worker->data1 = enc;
     worker->data2 = NULL;
-    worker->hook = (WebPWorkerHook)CompressAlphaJob;
+    worker->hook = CompressAlphaJob;
   }
 }
 
diff --git a/3rdparty/libwebp/src/enc/analysis_enc.c b/3rdparty/libwebp/src/enc/analysis_enc.c
index 08f471f5f8..a47ff7d4e8 100644
--- a/3rdparty/libwebp/src/enc/analysis_enc.c
+++ b/3rdparty/libwebp/src/enc/analysis_enc.c
@@ -434,7 +434,9 @@ typedef struct {
 } SegmentJob;
 
 // main work call
-static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) {
+static int DoSegmentsJob(void* arg1, void* arg2) {
+  SegmentJob* const job = (SegmentJob*)arg1;
+  VP8EncIterator* const it = (VP8EncIterator*)arg2;
   int ok = 1;
   if (!VP8IteratorIsDone(it)) {
     uint8_t tmp[32 + WEBP_ALIGN_CST];
@@ -462,7 +464,7 @@ static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
   WebPGetWorkerInterface()->Init(&job->worker);
   job->worker.data1 = job;
   job->worker.data2 = &job->it;
-  job->worker.hook = (WebPWorkerHook)DoSegmentsJob;
+  job->worker.hook = DoSegmentsJob;
   VP8IteratorInit(enc, &job->it);
   VP8IteratorSetRow(&job->it, start_row);
   VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
diff --git a/3rdparty/libwebp/src/enc/delta_palettization_enc.c b/3rdparty/libwebp/src/enc/delta_palettization_enc.c
deleted file mode 100644
index a61c8e6c93..0000000000
--- a/3rdparty/libwebp/src/enc/delta_palettization_enc.c
+++ /dev/null
@@ -1,455 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Author: Mislav Bradac (mislavm@google.com)
-//
-
-#include "src/enc/delta_palettization_enc.h"
-
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-#include "src/webp/types.h"
-#include "src/dsp/lossless.h"
-
-#define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b))
-
-// Format allows palette up to 256 entries, but more palette entries produce
-// bigger entropy. In the future it will probably be useful to add more entries
-// that are far from the origin of the palette or choose remaining entries
-// dynamically.
-#define DELTA_PALETTE_SIZE 226
-
-// Palette used for delta_palettization. Entries are roughly sorted by distance
-// of their signed equivalents from the origin.
-static const uint32_t kDeltaPalette[DELTA_PALETTE_SIZE] = {
-  MK_COL(0u, 0u, 0u),
-  MK_COL(255u, 255u, 255u),
-  MK_COL(1u, 1u, 1u),
-  MK_COL(254u, 254u, 254u),
-  MK_COL(2u, 2u, 2u),
-  MK_COL(4u, 4u, 4u),
-  MK_COL(252u, 252u, 252u),
-  MK_COL(250u, 0u, 0u),
-  MK_COL(0u, 250u, 0u),
-  MK_COL(0u, 0u, 250u),
-  MK_COL(6u, 0u, 0u),
-  MK_COL(0u, 6u, 0u),
-  MK_COL(0u, 0u, 6u),
-  MK_COL(0u, 0u, 248u),
-  MK_COL(0u, 0u, 8u),
-  MK_COL(0u, 248u, 0u),
-  MK_COL(0u, 248u, 248u),
-  MK_COL(0u, 248u, 8u),
-  MK_COL(0u, 8u, 0u),
-  MK_COL(0u, 8u, 248u),
-  MK_COL(0u, 8u, 8u),
-  MK_COL(8u, 8u, 8u),
-  MK_COL(248u, 0u, 0u),
-  MK_COL(248u, 0u, 248u),
-  MK_COL(248u, 0u, 8u),
-  MK_COL(248u, 248u, 0u),
-  MK_COL(248u, 8u, 0u),
-  MK_COL(8u, 0u, 0u),
-  MK_COL(8u, 0u, 248u),
-  MK_COL(8u, 0u, 8u),
-  MK_COL(8u, 248u, 0u),
-  MK_COL(8u, 8u, 0u),
-  MK_COL(23u, 23u, 23u),
-  MK_COL(13u, 13u, 13u),
-  MK_COL(232u, 232u, 232u),
-  MK_COL(244u, 244u, 244u),
-  MK_COL(245u, 245u, 250u),
-  MK_COL(50u, 50u, 50u),
-  MK_COL(204u, 204u, 204u),
-  MK_COL(236u, 236u, 236u),
-  MK_COL(16u, 16u, 16u),
-  MK_COL(240u, 16u, 16u),
-  MK_COL(16u, 240u, 16u),
-  MK_COL(240u, 240u, 16u),
-  MK_COL(16u, 16u, 240u),
-  MK_COL(240u, 16u, 240u),
-  MK_COL(16u, 240u, 240u),
-  MK_COL(240u, 240u, 240u),
-  MK_COL(0u, 0u, 232u),
-  MK_COL(0u, 232u, 0u),
-  MK_COL(232u, 0u, 0u),
-  MK_COL(0u, 0u, 24u),
-  MK_COL(0u, 24u, 0u),
-  MK_COL(24u, 0u, 0u),
-  MK_COL(32u, 32u, 32u),
-  MK_COL(224u, 32u, 32u),
-  MK_COL(32u, 224u, 32u),
-  MK_COL(224u, 224u, 32u),
-  MK_COL(32u, 32u, 224u),
-  MK_COL(224u, 32u, 224u),
-  MK_COL(32u, 224u, 224u),
-  MK_COL(224u, 224u, 224u),
-  MK_COL(0u, 0u, 176u),
-  MK_COL(0u, 0u, 80u),
-  MK_COL(0u, 176u, 0u),
-  MK_COL(0u, 176u, 176u),
-  MK_COL(0u, 176u, 80u),
-  MK_COL(0u, 80u, 0u),
-  MK_COL(0u, 80u, 176u),
-  MK_COL(0u, 80u, 80u),
-  MK_COL(176u, 0u, 0u),
-  MK_COL(176u, 0u, 176u),
-  MK_COL(176u, 0u, 80u),
-  MK_COL(176u, 176u, 0u),
-  MK_COL(176u, 80u, 0u),
-  MK_COL(80u, 0u, 0u),
-  MK_COL(80u, 0u, 176u),
-  MK_COL(80u, 0u, 80u),
-  MK_COL(80u, 176u, 0u),
-  MK_COL(80u, 80u, 0u),
-  MK_COL(0u, 0u, 152u),
-  MK_COL(0u, 0u, 104u),
-  MK_COL(0u, 152u, 0u),
-  MK_COL(0u, 152u, 152u),
-  MK_COL(0u, 152u, 104u),
-  MK_COL(0u, 104u, 0u),
-  MK_COL(0u, 104u, 152u),
-  MK_COL(0u, 104u, 104u),
-  MK_COL(152u, 0u, 0u),
-  MK_COL(152u, 0u, 152u),
-  MK_COL(152u, 0u, 104u),
-  MK_COL(152u, 152u, 0u),
-  MK_COL(152u, 104u, 0u),
-  MK_COL(104u, 0u, 0u),
-  MK_COL(104u, 0u, 152u),
-  MK_COL(104u, 0u, 104u),
-  MK_COL(104u, 152u, 0u),
-  MK_COL(104u, 104u, 0u),
-  MK_COL(216u, 216u, 216u),
-  MK_COL(216u, 216u, 40u),
-  MK_COL(216u, 216u, 176u),
-  MK_COL(216u, 216u, 80u),
-  MK_COL(216u, 40u, 216u),
-  MK_COL(216u, 40u, 40u),
-  MK_COL(216u, 40u, 176u),
-  MK_COL(216u, 40u, 80u),
-  MK_COL(216u, 176u, 216u),
-  MK_COL(216u, 176u, 40u),
-  MK_COL(216u, 176u, 176u),
-  MK_COL(216u, 176u, 80u),
-  MK_COL(216u, 80u, 216u),
-  MK_COL(216u, 80u, 40u),
-  MK_COL(216u, 80u, 176u),
-  MK_COL(216u, 80u, 80u),
-  MK_COL(40u, 216u, 216u),
-  MK_COL(40u, 216u, 40u),
-  MK_COL(40u, 216u, 176u),
-  MK_COL(40u, 216u, 80u),
-  MK_COL(40u, 40u, 216u),
-  MK_COL(40u, 40u, 40u),
-  MK_COL(40u, 40u, 176u),
-  MK_COL(40u, 40u, 80u),
-  MK_COL(40u, 176u, 216u),
-  MK_COL(40u, 176u, 40u),
-  MK_COL(40u, 176u, 176u),
-  MK_COL(40u, 176u, 80u),
-  MK_COL(40u, 80u, 216u),
-  MK_COL(40u, 80u, 40u),
-  MK_COL(40u, 80u, 176u),
-  MK_COL(40u, 80u, 80u),
-  MK_COL(80u, 216u, 216u),
-  MK_COL(80u, 216u, 40u),
-  MK_COL(80u, 216u, 176u),
-  MK_COL(80u, 216u, 80u),
-  MK_COL(80u, 40u, 216u),
-  MK_COL(80u, 40u, 40u),
-  MK_COL(80u, 40u, 176u),
-  MK_COL(80u, 40u, 80u),
-  MK_COL(80u, 176u, 216u),
-  MK_COL(80u, 176u, 40u),
-  MK_COL(80u, 176u, 176u),
-  MK_COL(80u, 176u, 80u),
-  MK_COL(80u, 80u, 216u),
-  MK_COL(80u, 80u, 40u),
-  MK_COL(80u, 80u, 176u),
-  MK_COL(80u, 80u, 80u),
-  MK_COL(0u, 0u, 192u),
-  MK_COL(0u, 0u, 64u),
-  MK_COL(0u, 0u, 128u),
-  MK_COL(0u, 192u, 0u),
-  MK_COL(0u, 192u, 192u),
-  MK_COL(0u, 192u, 64u),
-  MK_COL(0u, 192u, 128u),
-  MK_COL(0u, 64u, 0u),
-  MK_COL(0u, 64u, 192u),
-  MK_COL(0u, 64u, 64u),
-  MK_COL(0u, 64u, 128u),
-  MK_COL(0u, 128u, 0u),
-  MK_COL(0u, 128u, 192u),
-  MK_COL(0u, 128u, 64u),
-  MK_COL(0u, 128u, 128u),
-  MK_COL(176u, 216u, 216u),
-  MK_COL(176u, 216u, 40u),
-  MK_COL(176u, 216u, 176u),
-  MK_COL(176u, 216u, 80u),
-  MK_COL(176u, 40u, 216u),
-  MK_COL(176u, 40u, 40u),
-  MK_COL(176u, 40u, 176u),
-  MK_COL(176u, 40u, 80u),
-  MK_COL(176u, 176u, 216u),
-  MK_COL(176u, 176u, 40u),
-  MK_COL(176u, 176u, 176u),
-  MK_COL(176u, 176u, 80u),
-  MK_COL(176u, 80u, 216u),
-  MK_COL(176u, 80u, 40u),
-  MK_COL(176u, 80u, 176u),
-  MK_COL(176u, 80u, 80u),
-  MK_COL(192u, 0u, 0u),
-  MK_COL(192u, 0u, 192u),
-  MK_COL(192u, 0u, 64u),
-  MK_COL(192u, 0u, 128u),
-  MK_COL(192u, 192u, 0u),
-  MK_COL(192u, 192u, 192u),
-  MK_COL(192u, 192u, 64u),
-  MK_COL(192u, 192u, 128u),
-  MK_COL(192u, 64u, 0u),
-  MK_COL(192u, 64u, 192u),
-  MK_COL(192u, 64u, 64u),
-  MK_COL(192u, 64u, 128u),
-  MK_COL(192u, 128u, 0u),
-  MK_COL(192u, 128u, 192u),
-  MK_COL(192u, 128u, 64u),
-  MK_COL(192u, 128u, 128u),
-  MK_COL(64u, 0u, 0u),
-  MK_COL(64u, 0u, 192u),
-  MK_COL(64u, 0u, 64u),
-  MK_COL(64u, 0u, 128u),
-  MK_COL(64u, 192u, 0u),
-  MK_COL(64u, 192u, 192u),
-  MK_COL(64u, 192u, 64u),
-  MK_COL(64u, 192u, 128u),
-  MK_COL(64u, 64u, 0u),
-  MK_COL(64u, 64u, 192u),
-  MK_COL(64u, 64u, 64u),
-  MK_COL(64u, 64u, 128u),
-  MK_COL(64u, 128u, 0u),
-  MK_COL(64u, 128u, 192u),
-  MK_COL(64u, 128u, 64u),
-  MK_COL(64u, 128u, 128u),
-  MK_COL(128u, 0u, 0u),
-  MK_COL(128u, 0u, 192u),
-  MK_COL(128u, 0u, 64u),
-  MK_COL(128u, 0u, 128u),
-  MK_COL(128u, 192u, 0u),
-  MK_COL(128u, 192u, 192u),
-  MK_COL(128u, 192u, 64u),
-  MK_COL(128u, 192u, 128u),
-  MK_COL(128u, 64u, 0u),
-  MK_COL(128u, 64u, 192u),
-  MK_COL(128u, 64u, 64u),
-  MK_COL(128u, 64u, 128u),
-  MK_COL(128u, 128u, 0u),
-  MK_COL(128u, 128u, 192u),
-  MK_COL(128u, 128u, 64u),
-  MK_COL(128u, 128u, 128u),
-};
-
-#undef MK_COL
-
-//------------------------------------------------------------------------------
-// TODO(skal): move the functions to dsp/lossless.c when the correct
-// granularity is found. For now, we'll just copy-paste some useful bits
-// here instead.
-
-// In-place sum of each component with mod 256.
-static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
-  const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
-  const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
-  *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
-}
-
-static WEBP_INLINE uint32_t Clip255(uint32_t a) {
-  if (a < 256) {
-    return a;
-  }
-  // return 0, when a is a negative integer.
-  // return 255, when a is positive.
-  return ~a >> 24;
-}
-
-// Delta palettization functions.
-static WEBP_INLINE int Square(int x) {
-  return x * x;
-}
-
-static WEBP_INLINE uint32_t Intensity(uint32_t a) {
-  return
-      30 * ((a >> 16) & 0xff) +
-      59 * ((a >>  8) & 0xff) +
-      11 * ((a >>  0) & 0xff);
-}
-
-static uint32_t CalcDist(uint32_t predicted_value, uint32_t actual_value,
-                         uint32_t palette_entry) {
-  int i;
-  uint32_t distance = 0;
-  AddPixelsEq(&predicted_value, palette_entry);
-  for (i = 0; i < 32; i += 8) {
-    const int32_t av = (actual_value >> i) & 0xff;
-    const int32_t pv = (predicted_value >> i) & 0xff;
-    distance += Square(pv - av);
-  }
-  // We sum square of intensity difference with factor 10, but because Intensity
-  // returns 100 times real intensity we need to multiply differences of colors
-  // by 1000.
-  distance *= 1000u;
-  distance += Square(Intensity(predicted_value)
-                     - Intensity(actual_value));
-  return distance;
-}
-
-static uint32_t Predict(int x, int y, uint32_t* image) {
-  const uint32_t t = (y == 0) ? ARGB_BLACK : image[x];
-  const uint32_t l = (x == 0) ? ARGB_BLACK : image[x - 1];
-  const uint32_t p =
-      (((((t >> 24) & 0xff) + ((l >> 24) & 0xff)) / 2) << 24) +
-      (((((t >> 16) & 0xff) + ((l >> 16) & 0xff)) / 2) << 16) +
-      (((((t >>  8) & 0xff) + ((l >>  8) & 0xff)) / 2) <<  8) +
-      (((((t >>  0) & 0xff) + ((l >>  0) & 0xff)) / 2) <<  0);
-  if (x == 0 && y == 0) return ARGB_BLACK;
-  if (x == 0) return t;
-  if (y == 0) return l;
-  return p;
-}
-
-static WEBP_INLINE int AddSubtractComponentFullWithCoefficient(
-    int a, int b, int c) {
-  return Clip255(a + ((b - c) >> 2));
-}
-
-static WEBP_INLINE uint32_t ClampedAddSubtractFullWithCoefficient(
-    uint32_t c0, uint32_t c1, uint32_t c2) {
-  const int a = AddSubtractComponentFullWithCoefficient(
-      c0 >> 24, c1 >> 24, c2 >> 24);
-  const int r = AddSubtractComponentFullWithCoefficient((c0 >> 16) & 0xff,
-                                                       (c1 >> 16) & 0xff,
-                                                       (c2 >> 16) & 0xff);
-  const int g = AddSubtractComponentFullWithCoefficient((c0 >> 8) & 0xff,
-                                                       (c1 >> 8) & 0xff,
-                                                       (c2 >> 8) & 0xff);
-  const int b = AddSubtractComponentFullWithCoefficient(
-      c0 & 0xff, c1 & 0xff, c2 & 0xff);
-  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
-}
-
-//------------------------------------------------------------------------------
-
-// Find palette entry with minimum error from difference of actual pixel value
-// and predicted pixel value. Propagate error of pixel to its top and left pixel
-// in src array. Write predicted_value + palette_entry to new_image. Return
-// index of best palette entry.
-static int FindBestPaletteEntry(uint32_t src, uint32_t predicted_value,
-                                const uint32_t palette[], int palette_size) {
-  int i;
-  int idx = 0;
-  uint32_t best_distance = CalcDist(predicted_value, src, palette[0]);
-  for (i = 1; i < palette_size; ++i) {
-    const uint32_t distance = CalcDist(predicted_value, src, palette[i]);
-    if (distance < best_distance) {
-      best_distance = distance;
-      idx = i;
-    }
-  }
-  return idx;
-}
-
-static void ApplyBestPaletteEntry(int x, int y,
-                                  uint32_t new_value, uint32_t palette_value,
-                                  uint32_t* src, int src_stride,
-                                  uint32_t* new_image) {
-  AddPixelsEq(&new_value, palette_value);
-  if (x > 0) {
-    src[x - 1] = ClampedAddSubtractFullWithCoefficient(src[x - 1],
-                                                       new_value, src[x]);
-  }
-  if (y > 0) {
-    src[x - src_stride] =
-        ClampedAddSubtractFullWithCoefficient(src[x - src_stride],
-                                              new_value, src[x]);
-  }
-  new_image[x] = new_value;
-}
-
-//------------------------------------------------------------------------------
-// Main entry point
-
-static WebPEncodingError ApplyDeltaPalette(uint32_t* src, uint32_t* dst,
-                                           uint32_t src_stride,
-                                           uint32_t dst_stride,
-                                           const uint32_t* palette,
-                                           int palette_size,
-                                           int width, int height,
-                                           int num_passes) {
-  int x, y;
-  WebPEncodingError err = VP8_ENC_OK;
-  uint32_t* new_image = (uint32_t*)WebPSafeMalloc(width, sizeof(*new_image));
-  uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
-  if (new_image == NULL || tmp_row == NULL) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
-  }
-
-  while (num_passes--) {
-    uint32_t* cur_src = src;
-    uint32_t* cur_dst = dst;
-    for (y = 0; y < height; ++y) {
-      for (x = 0; x < width; ++x) {
-        const uint32_t predicted_value = Predict(x, y, new_image);
-        tmp_row[x] = FindBestPaletteEntry(cur_src[x], predicted_value,
-                                          palette, palette_size);
-        ApplyBestPaletteEntry(x, y, predicted_value, palette[tmp_row[x]],
-                              cur_src, src_stride, new_image);
-      }
-      for (x = 0; x < width; ++x) {
-        cur_dst[x] = palette[tmp_row[x]];
-      }
-      cur_src += src_stride;
-      cur_dst += dst_stride;
-    }
-  }
- Error:
-  WebPSafeFree(new_image);
-  WebPSafeFree(tmp_row);
-  return err;
-}
-
-// replaces enc->argb_ by a palettizable approximation of it,
-// and generates optimal enc->palette_[]
-WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
-  const WebPPicture* const pic = enc->pic_;
-  uint32_t* src = pic->argb;
-  uint32_t* dst = enc->argb_;
-  const int width = pic->width;
-  const int height = pic->height;
-
-  WebPEncodingError err = VP8_ENC_OK;
-  memcpy(enc->palette_, kDeltaPalette, sizeof(kDeltaPalette));
-  enc->palette_[DELTA_PALETTE_SIZE - 1] = src[0] - 0xff000000u;
-  enc->palette_size_ = DELTA_PALETTE_SIZE;
-  err = ApplyDeltaPalette(src, dst, pic->argb_stride, enc->current_width_,
-                          enc->palette_, enc->palette_size_,
-                          width, height, 2);
-  if (err != VP8_ENC_OK) goto Error;
-
- Error:
-  return err;
-}
-
-#else  // !WEBP_EXPERIMENTAL_FEATURES
-
-WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
-  (void)enc;
-  return VP8_ENC_ERROR_INVALID_CONFIGURATION;
-}
-
-#endif  // WEBP_EXPERIMENTAL_FEATURES
diff --git a/3rdparty/libwebp/src/enc/delta_palettization_enc.h b/3rdparty/libwebp/src/enc/delta_palettization_enc.h
deleted file mode 100644
index b15e2cd487..0000000000
--- a/3rdparty/libwebp/src/enc/delta_palettization_enc.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the COPYING file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-// -----------------------------------------------------------------------------
-//
-// Author: Mislav Bradac (mislavm@google.com)
-//
-
-#ifndef WEBP_ENC_DELTA_PALETTIZATION_ENC_H_
-#define WEBP_ENC_DELTA_PALETTIZATION_ENC_H_
-
-#include "src/webp/encode.h"
-#include "src/enc/vp8li_enc.h"
-
-// Replaces enc->argb_[] input by a palettizable approximation of it,
-// and generates optimal enc->palette_[].
-// This function can revert enc->use_palette_ / enc->use_predict_ flag
-// if delta-palettization is not producing expected saving.
-WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc);
-
-#endif  // WEBP_ENC_DELTA_PALETTIZATION_ENC_H_
diff --git a/3rdparty/libwebp/src/enc/frame_enc.c b/3rdparty/libwebp/src/enc/frame_enc.c
index 2b0dc66410..1aec376e44 100644
--- a/3rdparty/libwebp/src/enc/frame_enc.c
+++ b/3rdparty/libwebp/src/enc/frame_enc.c
@@ -198,7 +198,7 @@ static void SetSegmentProbas(VP8Encoder* const enc) {
 
   for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
     const VP8MBInfo* const mb = &enc->mb_info_[n];
-    p[mb->segment_]++;
+    ++p[mb->segment_];
   }
 #if !defined(WEBP_DISABLE_STATS)
   if (enc->pic_->stats != NULL) {
@@ -520,6 +520,14 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
 #endif
 }
 
+static void ResetSideInfo(const VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  WebPPicture* const pic = enc->pic_;
+  if (pic->stats != NULL) {
+    memset(enc->block_count_, 0, sizeof(enc->block_count_));
+  }
+  ResetSSE(enc);
+}
 #else  // defined(WEBP_DISABLE_STATS)
 static void ResetSSE(VP8Encoder* const enc) {
   (void)enc;
@@ -528,10 +536,16 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
   VP8Encoder* const enc = it->enc_;
   WebPPicture* const pic = enc->pic_;
   if (pic->extra_info != NULL) {
-    memset(pic->extra_info, 0,
-           enc->mb_w_ * enc->mb_h_ * sizeof(*pic->extra_info));
+    if (it->x_ == 0 && it->y_ == 0) {   // only do it once, at start
+      memset(pic->extra_info, 0,
+             enc->mb_w_ * enc->mb_h_ * sizeof(*pic->extra_info));
+    }
   }
 }
+
+static void ResetSideInfo(const VP8EncIterator* const it) {
+  (void)it;
+}
 #endif  // !defined(WEBP_DISABLE_STATS)
 
 static double GetPSNR(uint64_t mse, uint64_t size) {
@@ -570,7 +584,7 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
     VP8IteratorImport(&it, NULL);
     if (VP8Decimate(&it, &info, rd_opt)) {
       // Just record the number of skips and act like skip_proba is not used.
-      enc->proba_.nb_skip_++;
+      ++enc->proba_.nb_skip_;
     }
     RecordResiduals(&it, &info);
     size += info.R + info.H;
@@ -841,6 +855,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
     if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
       ++num_pass_left;
       enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+      if (is_last_pass) {
+        ResetSideInfo(&it);
+      }
       continue;                        // ...and start over
     }
     if (is_last_pass) {
@@ -871,4 +888,3 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
 #endif    // DISABLE_TOKEN_BUFFER
 
 //------------------------------------------------------------------------------
-
diff --git a/3rdparty/libwebp/src/enc/histogram_enc.c b/3rdparty/libwebp/src/enc/histogram_enc.c
index 4d13e19fb2..9fdbc627a1 100644
--- a/3rdparty/libwebp/src/enc/histogram_enc.c
+++ b/3rdparty/libwebp/src/enc/histogram_enc.c
@@ -200,14 +200,9 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
   }
 }
 
-double VP8LBitsEntropy(const uint32_t* const array, int n,
-                       uint32_t* const trivial_symbol) {
+double VP8LBitsEntropy(const uint32_t* const array, int n) {
   VP8LBitEntropy entropy;
   VP8LBitsEntropyUnrefined(array, n, &entropy);
-  if (trivial_symbol != NULL) {
-    *trivial_symbol =
-        (entropy.nonzeros == 1) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM;
-  }
 
   return BitsEntropyRefine(&entropy);
 }
@@ -1031,7 +1026,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
     }
   }
 
-  // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also.
+  // TODO(vrabaud): Optimize HistogramRemap for low-effort compression mode.
   // Find the optimal map from original histograms to the final ones.
   HistogramRemap(orig_histo, image_histo, histogram_symbols);
 
diff --git a/3rdparty/libwebp/src/enc/histogram_enc.h b/3rdparty/libwebp/src/enc/histogram_enc.h
index 15b1fbda34..e8c4c83f6f 100644
--- a/3rdparty/libwebp/src/enc/histogram_enc.h
+++ b/3rdparty/libwebp/src/enc/histogram_enc.h
@@ -109,10 +109,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
                              uint16_t* const histogram_symbols);
 
 // Returns the entropy for the symbols in the input array.
-// Also sets trivial_symbol to the code value, if the array has only one code
-// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM.
-double VP8LBitsEntropy(const uint32_t* const array, int n,
-                       uint32_t* const trivial_symbol);
+double VP8LBitsEntropy(const uint32_t* const array, int n);
 
 // Estimate how many bits the combined entropy of literals and distance
 // approximately maps to.
diff --git a/3rdparty/libwebp/src/enc/iterator_enc.c b/3rdparty/libwebp/src/enc/iterator_enc.c
index cfacfd2401..7c47d51272 100644
--- a/3rdparty/libwebp/src/enc/iterator_enc.c
+++ b/3rdparty/libwebp/src/enc/iterator_enc.c
@@ -26,6 +26,9 @@ static void InitLeft(VP8EncIterator* const it) {
   memset(it->u_left_, 129, 8);
   memset(it->v_left_, 129, 8);
   it->left_nz_[8] = 0;
+  if (it->top_derr_ != NULL) {
+    memset(&it->left_derr_, 0, sizeof(it->left_derr_));
+  }
 }
 
 static void InitTop(VP8EncIterator* const it) {
@@ -33,6 +36,9 @@ static void InitTop(VP8EncIterator* const it) {
   const size_t top_size = enc->mb_w_ * 16;
   memset(enc->y_top_, 127, 2 * top_size);
   memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
+  if (enc->top_derr_ != NULL) {
+    memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_));
+  }
 }
 
 void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
@@ -76,6 +82,7 @@ void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
   it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
   it->u_left_ = it->y_left_ + 16 + 16;
   it->v_left_ = it->u_left_ + 16;
+  it->top_derr_ = enc->top_derr_;
   VP8IteratorReset(it);
 }
 
@@ -450,4 +457,3 @@ int VP8IteratorRotateI4(VP8EncIterator* const it,
 }
 
 //------------------------------------------------------------------------------
-
diff --git a/3rdparty/libwebp/src/enc/near_lossless_enc.c b/3rdparty/libwebp/src/enc/near_lossless_enc.c
index cadd14c664..5517a7e271 100644
--- a/3rdparty/libwebp/src/enc/near_lossless_enc.c
+++ b/3rdparty/libwebp/src/enc/near_lossless_enc.c
@@ -146,6 +146,6 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
 
 // Define a stub to suppress compiler warnings.
 extern void VP8LNearLosslessStub(void);
-WEBP_TSAN_IGNORE_FUNCTION void VP8LNearLosslessStub(void) {}
+void VP8LNearLosslessStub(void) {}
 
 #endif  // (WEBP_NEAR_LOSSLESS == 1)
diff --git a/3rdparty/libwebp/src/enc/picture_csp_enc.c b/3rdparty/libwebp/src/enc/picture_csp_enc.c
index d531dd0282..02d9df76d5 100644
--- a/3rdparty/libwebp/src/enc/picture_csp_enc.c
+++ b/3rdparty/libwebp/src/enc/picture_csp_enc.c
@@ -28,11 +28,11 @@
 // If defined, use table to compute x / alpha.
 #define USE_INVERSE_ALPHA_TABLE
 
-static const union {
-  uint32_t argb;
-  uint8_t  bytes[4];
-} test_endian = { 0xff000000u };
-#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
+#ifdef WORDS_BIGENDIAN
+#define ALPHA_OFFSET 0   // uint32_t 0xff000000 is 0xff,00,00,00 in memory
+#else
+#define ALPHA_OFFSET 3   // uint32_t 0xff000000 is 0x00,00,00,ff in memory
+#endif
 
 //------------------------------------------------------------------------------
 // Detection of non-trivial transparency
@@ -61,7 +61,7 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
     return CheckNonOpaque(picture->a, picture->width, picture->height,
                           1, picture->a_stride);
   } else {
-    const int alpha_offset = ALPHA_IS_LAST ? 3 : 0;
+    const int alpha_offset = ALPHA_OFFSET;
     return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
                           picture->width, picture->height,
                           4, picture->argb_stride * sizeof(*picture->argb));
@@ -126,7 +126,7 @@ static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
 
 #else
 
-static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {}
+static void InitGammaTables(void) {}
 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
   return (int)(base_value << shift);
@@ -170,29 +170,33 @@ typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
 
 #if defined(USE_GAMMA_COMPRESSION)
 
-// float variant of gamma-correction
 // We use tables of different size and precision for the Rec709 / BT2020
 // transfer function.
 #define kGammaF (1./0.45)
-static float kGammaToLinearTabF[MAX_Y_T + 1];   // size scales with Y_FIX
-static float kLinearToGammaTabF[kGammaTabSize + 2];
-static volatile int kGammaTablesFOk = 0;
-
-static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
-  if (!kGammaTablesFOk) {
+static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
+#define GAMMA_TO_LINEAR_BITS 14
+static uint32_t kGammaToLinearTabS[MAX_Y_T + 1];   // size scales with Y_FIX
+static volatile int kGammaTablesSOk = 0;
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesS(void) {
+  assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values
+  if (!kGammaTablesSOk) {
     int v;
     const double norm = 1. / MAX_Y_T;
     const double scale = 1. / kGammaTabSize;
     const double a = 0.09929682680944;
     const double thresh = 0.018053968510807;
+    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
     for (v = 0; v <= MAX_Y_T; ++v) {
       const double g = norm * v;
+      double value;
       if (g <= thresh * 4.5) {
-        kGammaToLinearTabF[v] = (float)(g / 4.5);
+        value = g / 4.5;
       } else {
         const double a_rec = 1. / (1. + a);
-        kGammaToLinearTabF[v] = (float)pow(a_rec * (g + a), kGammaF);
+        value = pow(a_rec * (g + a), kGammaF);
       }
+      kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
     }
     for (v = 0; v <= kGammaTabSize; ++v) {
       const double g = scale * v;
@@ -202,37 +206,44 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
       } else {
         value = (1. + a) * pow(g, 1. / kGammaF) - a;
       }
-      kLinearToGammaTabF[v] = (float)(MAX_Y_T * value);
+      // we already incorporate the 1/2 rounding constant here
+      kLinearToGammaTabS[v] =
+          (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
     }
     // to prevent small rounding errors to cause read-overflow:
-    kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize];
-    kGammaTablesFOk = 1;
+    kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
+    kGammaTablesSOk = 1;
   }
 }
 
-static WEBP_INLINE float GammaToLinearF(int v) {
-  return kGammaToLinearTabF[v];
+// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
+static WEBP_INLINE uint32_t GammaToLinearS(int v) {
+  return kGammaToLinearTabS[v];
 }
 
-static WEBP_INLINE int LinearToGammaF(float value) {
-  const float v = value * kGammaTabSize;
-  const int tab_pos = (int)v;
-  const float x = v - (float)tab_pos;      // fractional part
-  const float v0 = kLinearToGammaTabF[tab_pos + 0];
-  const float v1 = kLinearToGammaTabF[tab_pos + 1];
-  const float y = v1 * x + v0 * (1.f - x);  // interpolate
-  return (int)(y + .5);
+static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
+  // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
+  const uint32_t v = value * kGammaTabSize;
+  const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
+  // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
+  const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS);  // fractional part
+  // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
+  const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
+  const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
+  // Final interpolation. Note that rounding is already included.
+  const uint32_t v2 = (v1 - v0) * x;    // note: v1 >= v0.
+  const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
+  return result;
 }
 
 #else
 
-static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {}
-static WEBP_INLINE float GammaToLinearF(int v) {
-  const float norm = 1.f / MAX_Y_T;
-  return norm * v;
+static void InitGammaTablesS(void) {}
+static WEBP_INLINE uint32_t GammaToLinearS(int v) {
+  return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
 }
-static WEBP_INLINE int LinearToGammaF(float value) {
-  return (int)(MAX_Y_T * value + .5);
+static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
+  return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
 }
 
 #endif    // USE_GAMMA_COMPRESSION
@@ -254,26 +265,22 @@ static int RGBToGray(int r, int g, int b) {
   return (luma >> YUV_FIX);
 }
 
-static float RGBToGrayF(float r, float g, float b) {
-  return (float)(0.2126 * r + 0.7152 * g + 0.0722 * b);
-}
-
-static int ScaleDown(int a, int b, int c, int d) {
-  const float A = GammaToLinearF(a);
-  const float B = GammaToLinearF(b);
-  const float C = GammaToLinearF(c);
-  const float D = GammaToLinearF(d);
-  return LinearToGammaF(0.25f * (A + B + C + D));
+static uint32_t ScaleDown(int a, int b, int c, int d) {
+  const uint32_t A = GammaToLinearS(a);
+  const uint32_t B = GammaToLinearS(b);
+  const uint32_t C = GammaToLinearS(c);
+  const uint32_t D = GammaToLinearS(d);
+  return LinearToGammaS((A + B + C + D + 2) >> 2);
 }
 
 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
   int i;
   for (i = 0; i < w; ++i) {
-    const float R = GammaToLinearF(src[0 * w + i]);
-    const float G = GammaToLinearF(src[1 * w + i]);
-    const float B = GammaToLinearF(src[2 * w + i]);
-    const float Y = RGBToGrayF(R, G, B);
-    dst[i] = (fixed_y_t)LinearToGammaF(Y);
+    const uint32_t R = GammaToLinearS(src[0 * w + i]);
+    const uint32_t G = GammaToLinearS(src[1 * w + i]);
+    const uint32_t B = GammaToLinearS(src[2 * w + i]);
+    const uint32_t Y = RGBToGray(R, G, B);
+    dst[i] = (fixed_y_t)LinearToGammaS(Y);
   }
 }
 
@@ -863,7 +870,7 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
   }
 
   if (use_iterative_conversion) {
-    InitGammaTablesF();
+    InitGammaTablesS();
     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
       return 0;
     }
@@ -990,10 +997,10 @@ static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
   } else {
     const uint8_t* const argb = (const uint8_t*)picture->argb;
-    const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
-    const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
-    const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
-    const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
+    const uint8_t* const a = argb + (0 ^ ALPHA_OFFSET);
+    const uint8_t* const r = argb + (1 ^ ALPHA_OFFSET);
+    const uint8_t* const g = argb + (2 ^ ALPHA_OFFSET);
+    const uint8_t* const b = argb + (3 ^ ALPHA_OFFSET);
 
     picture->colorspace = WEBP_YUV420;
     return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
@@ -1044,7 +1051,8 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
     const int argb_stride = 4 * picture->argb_stride;
     uint8_t* dst = (uint8_t*)picture->argb;
     const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
-    WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
+    WebPUpsampleLinePairFunc upsample =
+        WebPGetLinePairConverter(ALPHA_OFFSET > 0);
 
     // First row, with replicated top samples.
     upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
@@ -1087,6 +1095,7 @@ static int Import(WebPPicture* const picture,
                   const uint8_t* rgb, int rgb_stride,
                   int step, int swap_rb, int import_alpha) {
   int y;
+  // swap_rb -> b,g,r,a , !swap_rb -> r,g,b,a
   const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0);
   const uint8_t* g_ptr = rgb + 1;
   const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2);
@@ -1104,19 +1113,32 @@ static int Import(WebPPicture* const picture,
   WebPInitAlphaProcessing();
 
   if (import_alpha) {
+    // dst[] byte order is {a,r,g,b} for big-endian, {b,g,r,a} for little endian
     uint32_t* dst = picture->argb;
-    const int do_copy =
-        (!swap_rb && !ALPHA_IS_LAST) || (swap_rb && ALPHA_IS_LAST);
+    const int do_copy = (ALPHA_OFFSET == 3) && swap_rb;
     assert(step == 4);
-    for (y = 0; y < height; ++y) {
-      if (do_copy) {
+    if (do_copy) {
+      for (y = 0; y < height; ++y) {
         memcpy(dst, rgb, width * 4);
-      } else {
+        rgb += rgb_stride;
+        dst += picture->argb_stride;
+      }
+    } else {
+      for (y = 0; y < height; ++y) {
+#ifdef WORDS_BIGENDIAN
+        // BGRA or RGBA input order.
+        const uint8_t* a_ptr = rgb + 3;
+        WebPPackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst);
+        r_ptr += rgb_stride;
+        g_ptr += rgb_stride;
+        b_ptr += rgb_stride;
+#else
         // RGBA input order. Need to swap R and B.
         VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst);
+#endif
+        rgb += rgb_stride;
+        dst += picture->argb_stride;
       }
-      rgb += rgb_stride;
-      dst += picture->argb_stride;
     }
   } else {
     uint32_t* dst = picture->argb;
diff --git a/3rdparty/libwebp/src/enc/picture_psnr_enc.c b/3rdparty/libwebp/src/enc/picture_psnr_enc.c
index 362a7c79be..1a2f0bef3e 100644
--- a/3rdparty/libwebp/src/enc/picture_psnr_enc.c
+++ b/3rdparty/libwebp/src/enc/picture_psnr_enc.c
@@ -18,6 +18,7 @@
 #include <math.h>
 #include <stdlib.h>
 
+#include "src/dsp/dsp.h"
 #include "src/enc/vp8i_enc.h"
 #include "src/utils/utils.h"
 
@@ -169,6 +170,12 @@ int WebPPlaneDistortion(const uint8_t* src, size_t src_stride,
   return 1;
 }
 
+#ifdef WORDS_BIGENDIAN
+#define BLUE_OFFSET 3   // uint32_t 0x000000ff is 0x00,00,00,ff in memory
+#else
+#define BLUE_OFFSET 0   // uint32_t 0x000000ff is 0xff,00,00,00 in memory
+#endif
+
 int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
                           int type, float results[5]) {
   int w, h, c;
@@ -195,8 +202,10 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
     float distortion;
     const size_t stride0 = 4 * (size_t)p0.argb_stride;
     const size_t stride1 = 4 * (size_t)p1.argb_stride;
-    if (!WebPPlaneDistortion((const uint8_t*)p0.argb + c, stride0,
-                             (const uint8_t*)p1.argb + c, stride1,
+    // results are reported as BGRA
+    const int offset = c ^ BLUE_OFFSET;
+    if (!WebPPlaneDistortion((const uint8_t*)p0.argb + offset, stride0,
+                             (const uint8_t*)p1.argb + offset, stride1,
                              w, h, 4, type, &distortion, results + c)) {
       goto Error;
     }
@@ -214,6 +223,8 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
   return ok;
 }
 
+#undef BLUE_OFFSET
+
 #else  // defined(WEBP_DISABLE_STATS)
 int WebPPlaneDistortion(const uint8_t* src, size_t src_stride,
                         const uint8_t* ref, size_t ref_stride,
diff --git a/3rdparty/libwebp/src/enc/quant_enc.c b/3rdparty/libwebp/src/enc/quant_enc.c
index 3b1a3129b5..35bfaf21ef 100644
--- a/3rdparty/libwebp/src/enc/quant_enc.c
+++ b/3rdparty/libwebp/src/enc/quant_enc.c
@@ -826,6 +826,85 @@ static int ReconstructIntra4(VP8EncIterator* const it,
   return nz;
 }
 
+//------------------------------------------------------------------------------
+// DC-error diffusion
+
+// Diffusion weights. We under-correct a bit (15/16th of the error is actually
+// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
+#define C1 7    // fraction of error sent to the 4x4 block below
+#define C2 8    // fraction of error sent to the 4x4 block on the right
+#define DSHIFT 4
+#define DSCALE 1   // storage descaling, needed to make the error fit int8_t
+
+// Quantize as usual, but also compute and return the quantization error.
+// Error is already divided by DSHIFT.
+static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
+  int V = *v;
+  const int sign = (V < 0);
+  if (sign) V = -V;
+  if (V > (int)mtx->zthresh_[0]) {
+    const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0];
+    const int err = (V - qV);
+    *v = sign ? -qV : qV;
+    return (sign ? -err : err) >> DSCALE;
+  }
+  *v = 0;
+  return (sign ? -V : V) >> DSCALE;
+}
+
+static void CorrectDCValues(const VP8EncIterator* const it,
+                            const VP8Matrix* const mtx,
+                            int16_t tmp[][16], VP8ModeScore* const rd) {
+  //         | top[0] | top[1]
+  // --------+--------+---------
+  // left[0] | tmp[0]   tmp[1]  <->   err0 err1
+  // left[1] | tmp[2]   tmp[3]        err2 err3
+  //
+  // Final errors {err1,err2,err3} are preserved and later restored
+  // as top[]/left[] on the next block.
+  int ch;
+  for (ch = 0; ch <= 1; ++ch) {
+    const int8_t* const top = it->top_derr_[it->x_][ch];
+    const int8_t* const left = it->left_derr_[ch];
+    int16_t (* const c)[16] = &tmp[ch * 4];
+    int err0, err1, err2, err3;
+    c[0][0] += (C1 * top[0] + C2 * left[0]) >> (DSHIFT - DSCALE);
+    err0 = QuantizeSingle(&c[0][0], mtx);
+    c[1][0] += (C1 * top[1] + C2 * err0) >> (DSHIFT - DSCALE);
+    err1 = QuantizeSingle(&c[1][0], mtx);
+    c[2][0] += (C1 * err0 + C2 * left[1]) >> (DSHIFT - DSCALE);
+    err2 = QuantizeSingle(&c[2][0], mtx);
+    c[3][0] += (C1 * err1 + C2 * err2) >> (DSHIFT - DSCALE);
+    err3 = QuantizeSingle(&c[3][0], mtx);
+    // error 'err' is bounded by mtx->q_[0] which is 132 at max. Hence
+    // err >> DSCALE will fit in an int8_t type if DSCALE>=1.
+    assert(abs(err1) <= 127 && abs(err2) <= 127 && abs(err3) <= 127);
+    rd->derr[ch][0] = (int8_t)err1;
+    rd->derr[ch][1] = (int8_t)err2;
+    rd->derr[ch][2] = (int8_t)err3;
+  }
+}
+
+static void StoreDiffusionErrors(VP8EncIterator* const it,
+                                 const VP8ModeScore* const rd) {
+  int ch;
+  for (ch = 0; ch <= 1; ++ch) {
+    int8_t* const top = it->top_derr_[it->x_][ch];
+    int8_t* const left = it->left_derr_[ch];
+    left[0] = rd->derr[ch][0];            // restore err1
+    left[1] = 3 * rd->derr[ch][2] >> 2;   //     ... 3/4th of err3
+    top[0]  = rd->derr[ch][1];            //     ... err2
+    top[1]  = rd->derr[ch][2] - left[1];  //     ... 1/4th of err3.
+  }
+}
+
+#undef C1
+#undef C2
+#undef DSHIFT
+#undef DSCALE
+
+//------------------------------------------------------------------------------
+
 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
                          uint8_t* const yuv_out, int mode) {
   const VP8Encoder* const enc = it->enc_;
@@ -839,6 +918,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
   for (n = 0; n < 8; n += 2) {
     VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
   }
+  if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd);
+
   if (DO_TRELLIS_UV && it->do_trellis_) {
     int ch, x, y;
     for (ch = 0, n = 0; ch <= 2; ch += 2) {
@@ -1101,6 +1182,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
       CopyScore(&rd_best, &rd_uv);
       rd->mode_uv = mode;
       memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
+      if (it->top_derr_ != NULL) {
+        memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr));
+      }
       SwapPtr(&dst, &tmp_dst);
     }
   }
@@ -1109,6 +1193,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
   if (dst != dst0) {   // copy 16x8 block if needed
     VP8Copy16x8(dst, dst0);
   }
+  if (it->top_derr_ != NULL) {  // store diffusion errors for next block
+    StoreDiffusionErrors(it, rd);
+  }
 }
 
 //------------------------------------------------------------------------------
diff --git a/3rdparty/libwebp/src/enc/vp8i_enc.h b/3rdparty/libwebp/src/enc/vp8i_enc.h
index 3463491e9d..624e8f8e66 100644
--- a/3rdparty/libwebp/src/enc/vp8i_enc.h
+++ b/3rdparty/libwebp/src/enc/vp8i_enc.h
@@ -30,9 +30,9 @@ extern "C" {
 // Various defines and enums
 
 // version numbers
-#define ENC_MAJ_VERSION 0
-#define ENC_MIN_VERSION 6
-#define ENC_REV_VERSION 1
+#define ENC_MAJ_VERSION 1
+#define ENC_MIN_VERSION 0
+#define ENC_REV_VERSION 0
 
 enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
        MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
@@ -120,6 +120,9 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
 // Uncomment the following to remove token-buffer code:
 // #define DISABLE_TOKEN_BUFFER
 
+// quality below which error-diffusion is enabled
+#define ERROR_DIFFUSION_QUALITY 98
+
 //------------------------------------------------------------------------------
 // Headers
 
@@ -201,6 +204,8 @@ typedef struct {
   score_t i4_penalty_;   // penalty for using Intra4
 } VP8SegmentInfo;
 
+typedef int8_t DError[2 /* u/v */][2 /* top or left */];
+
 // Handy transient struct to accumulate score and info during RD-optimization
 // and mode evaluation.
 typedef struct {
@@ -213,6 +218,7 @@ typedef struct {
   uint8_t modes_i4[16];       // mode numbers for intra4 predictions
   int mode_uv;                // mode number of chroma prediction
   uint32_t nz;                // non-zero blocks
+  int8_t derr[2][3];          // DC diffusion errors for U/V for blocks #1/2/3
 } VP8ModeScore;
 
 // Iterator structure to iterate through macroblocks, pointing to the
@@ -242,6 +248,9 @@ typedef struct {
   int           count_down0_;      // starting counter value (for progress)
   int           percent0_;         // saved initial progress percent
 
+  DError        left_derr_;        // left error diffusion (u/v)
+  DError       *top_derr_;         // top diffusion error - NULL if disabled
+
   uint8_t* y_left_;    // left luma samples (addressable from index -1 to 15).
   uint8_t* u_left_;    // left u samples (addressable from index -1 to 7)
   uint8_t* v_left_;    // left v samples (addressable from index -1 to 7)
@@ -401,6 +410,7 @@ struct VP8Encoder {
   uint8_t*   uv_top_;    // top u/v samples.
                          // U and V are packed into 16 bytes (8 U + 8 V)
   LFStats*   lf_stats_;  // autofilter stats (if NULL, autofilter is off)
+  DError*    top_derr_;  // diffusion error (NULL if disabled)
 };
 
 //------------------------------------------------------------------------------
diff --git a/3rdparty/libwebp/src/enc/vp8l_enc.c b/3rdparty/libwebp/src/enc/vp8l_enc.c
index 312e521906..a89184eb08 100644
--- a/3rdparty/libwebp/src/enc/vp8l_enc.c
+++ b/3rdparty/libwebp/src/enc/vp8l_enc.c
@@ -26,8 +26,6 @@
 #include "src/utils/utils.h"
 #include "src/webp/format_constants.h"
 
-#include "src/enc/delta_palettization_enc.h"
-
 // Maximum number of histogram images (sub-blocks).
 #define MAX_HUFF_IMAGE_SIZE       2600
 
@@ -259,7 +257,7 @@ static int AnalyzeEntropy(const uint32_t* argb,
       ++histo[kHistoAlphaPred * 256];
 
       for (j = 0; j < kHistoTotal; ++j) {
-        entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256, NULL);
+        entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256);
       }
       entropy[kDirect] = entropy_comp[kHistoAlpha] +
           entropy_comp[kHistoRed] +
@@ -384,8 +382,7 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
       AnalyzeAndCreatePalette(pic, low_effort,
                               enc->palette_, &enc->palette_size_);
 
-  // TODO(jyrki): replace the decision to be based on an actual estimate
-  // of entropy, or even spatial variance of entropy.
+  // Empirical bit sizes.
   enc->histo_bits_ = GetHistoBits(method, use_palette,
                                   pic->width, pic->height);
   enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
@@ -756,7 +753,6 @@ static WebPEncodingError StoreImageToBitMask(
       // Don't write the distance with the extra bits code since
       // the distance can be up to 18 bits of extra bits, and the prefix
       // 15 bits, totaling to 33, and our PutBits only supports up to 32 bits.
-      // TODO(jyrki): optimize this further.
       VP8LPrefixEncode(distance, &code, &n_bits, &bits);
       WriteHuffmanCode(bw, codes + 4, code);
       VP8LPutBits(bw, bits, n_bits);
@@ -1464,49 +1460,6 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort,
                               20 /* quality */, low_effort);
 }
 
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-
-static WebPEncodingError EncodeDeltaPalettePredictorImage(
-    VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality,
-    int low_effort) {
-  const WebPPicture* const pic = enc->pic_;
-  const int width = pic->width;
-  const int height = pic->height;
-
-  const int pred_bits = 5;
-  const int transform_width = VP8LSubSampleSize(width, pred_bits);
-  const int transform_height = VP8LSubSampleSize(height, pred_bits);
-  const int pred = 7;   // default is Predictor7 (Top/Left Average)
-  const int tiles_per_row = VP8LSubSampleSize(width, pred_bits);
-  const int tiles_per_col = VP8LSubSampleSize(height, pred_bits);
-  uint32_t* predictors;
-  int tile_x, tile_y;
-  WebPEncodingError err = VP8_ENC_OK;
-
-  predictors = (uint32_t*)WebPSafeMalloc(tiles_per_col * tiles_per_row,
-                                         sizeof(*predictors));
-  if (predictors == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
-
-  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
-    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
-      predictors[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
-    }
-  }
-
-  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
-  VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
-  VP8LPutBits(bw, pred_bits - 2, 3);
-  err = EncodeImageNoHuffman(
-      bw, predictors, &enc->hash_chain_,
-      (VP8LBackwardRefs*)&enc->refs_[0],  // cast const away
-      (VP8LBackwardRefs*)&enc->refs_[1],
-      transform_width, transform_height, quality, low_effort);
-  WebPSafeFree(predictors);
-  return err;
-}
-
-#endif // WEBP_EXPERIMENTAL_FEATURES
-
 // -----------------------------------------------------------------------------
 // VP8LEncoder
 
@@ -1568,7 +1521,7 @@ static int EncodeStreamHook(void* input, void* data2) {
   WebPEncodingError err = VP8_ENC_OK;
   const int quality = (int)config->quality;
   const int low_effort = (config->method == 0);
-#if (WEBP_NEAR_LOSSLESS == 1) || defined(WEBP_EXPERIMENTAL_FEATURES)
+#if (WEBP_NEAR_LOSSLESS == 1)
   const int width = picture->width;
 #endif
   const int height = picture->height;
@@ -1627,29 +1580,6 @@ static int EncodeStreamHook(void* input, void* data2) {
     enc->argb_content_ = kEncoderNone;
 #endif
 
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    if (config->use_delta_palette) {
-      enc->use_predict_ = 1;
-      enc->use_cross_color_ = 0;
-      enc->use_subtract_green_ = 0;
-      enc->use_palette_ = 1;
-      if (enc->argb_content_ != kEncoderNearLossless &&
-          enc->argb_content_ != kEncoderPalette) {
-        err = MakeInputImageCopy(enc);
-        if (err != VP8_ENC_OK) goto Error;
-      }
-      err = WebPSearchOptimalDeltaPalette(enc);
-      if (err != VP8_ENC_OK) goto Error;
-      if (enc->use_palette_) {
-        err = AllocateTransformBuffer(enc, width, height);
-        if (err != VP8_ENC_OK) goto Error;
-        err = EncodeDeltaPalettePredictorImage(bw, enc, quality, low_effort);
-        if (err != VP8_ENC_OK) goto Error;
-        use_delta_palette = 1;
-      }
-    }
-#endif  // WEBP_EXPERIMENTAL_FEATURES
-
     // Encode palette
     if (enc->use_palette_) {
       err = EncodePalette(bw, low_effort, enc);
@@ -1822,7 +1752,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
       worker_interface->Init(worker);
       worker->data1 = param;
       worker->data2 = NULL;
-      worker->hook = (WebPWorkerHook)EncodeStreamHook;
+      worker->hook = EncodeStreamHook;
     }
   }
 
@@ -1944,7 +1874,6 @@ int VP8LEncodeImage(const WebPConfig* const config,
   err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/);
   if (err != VP8_ENC_OK) goto Error;
 
-  // TODO(skal): have a fine-grained progress report in VP8LEncodeStream().
   if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort;
 
   // Finish the RIFF chunk.
diff --git a/3rdparty/libwebp/src/enc/webp_enc.c b/3rdparty/libwebp/src/enc/webp_enc.c
index 283cda8e7b..9f4b10c26c 100644
--- a/3rdparty/libwebp/src/enc/webp_enc.c
+++ b/3rdparty/libwebp/src/enc/webp_enc.c
@@ -159,12 +159,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
       + WEBP_ALIGN_CST;                      // align all
   const size_t lf_stats_size =
       config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0;
+  const size_t top_derr_size =
+      (config->quality <= ERROR_DIFFUSION_QUALITY || config->pass > 1) ?
+          mb_w * sizeof(*enc->top_derr_) : 0;
   uint8_t* mem;
   const uint64_t size = (uint64_t)sizeof(*enc)   // main struct
                       + WEBP_ALIGN_CST           // cache alignment
                       + info_size                // modes info
                       + preds_size               // prediction modes
                       + samples_size             // top/left samples
+                      + top_derr_size            // top diffusion error
                       + nz_size                  // coeff context bits
                       + lf_stats_size;           // autofilter stats
 
@@ -175,11 +179,12 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
          "                info: %ld\n"
          "               preds: %ld\n"
          "         top samples: %ld\n"
+         "       top diffusion: %ld\n"
          "            non-zero: %ld\n"
          "            lf-stats: %ld\n"
          "               total: %ld\n",
          sizeof(*enc) + WEBP_ALIGN_CST, info_size,
-         preds_size, samples_size, nz_size, lf_stats_size, size);
+         preds_size, samples_size, top_derr_size, nz_size, lf_stats_size, size);
   printf("Transient object sizes:\n"
          "      VP8EncIterator: %ld\n"
          "        VP8ModeScore: %ld\n"
@@ -219,6 +224,8 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
   enc->y_top_ = mem;
   enc->uv_top_ = enc->y_top_ + top_stride;
   mem += 2 * top_stride;
+  enc->top_derr_ = top_derr_size ? (DError*)mem : NULL;
+  mem += top_derr_size;
   assert(mem <= (uint8_t*)enc + size);
 
   enc->config_ = config;
diff --git a/3rdparty/libwebp/src/mux/muxi.h b/3rdparty/libwebp/src/mux/muxi.h
index b73e3fbd7a..6b57eea30f 100644
--- a/3rdparty/libwebp/src/mux/muxi.h
+++ b/3rdparty/libwebp/src/mux/muxi.h
@@ -26,9 +26,9 @@ extern "C" {
 //------------------------------------------------------------------------------
 // Defines and constants.
 
-#define MUX_MAJ_VERSION 0
-#define MUX_MIN_VERSION 4
-#define MUX_REV_VERSION 1
+#define MUX_MAJ_VERSION 1
+#define MUX_MIN_VERSION 0
+#define MUX_REV_VERSION 0
 
 // Chunk object.
 typedef struct WebPChunk WebPChunk;
diff --git a/3rdparty/libwebp/src/utils/endian_inl_utils.h b/3rdparty/libwebp/src/utils/endian_inl_utils.h
index 4b2f91dfb8..3630a293bf 100644
--- a/3rdparty/libwebp/src/utils/endian_inl_utils.h
+++ b/3rdparty/libwebp/src/utils/endian_inl_utils.h
@@ -19,13 +19,6 @@
 #include "src/dsp/dsp.h"
 #include "src/webp/types.h"
 
-// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-#if !defined(WORDS_BIGENDIAN) && \
-    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
-     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-#define WORDS_BIGENDIAN
-#endif
-
 #if defined(WORDS_BIGENDIAN)
 #define HToLE32 BSwap32
 #define HToLE16 BSwap16

From 4091ced5aba3224fa72ffe077b93bc003ec7e298 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Tue, 5 Jun 2018 12:40:43 +0300
Subject: [PATCH 07/33] core: set default logger level to 'warning' in Release
 builds

should hide unnecessary 'info' messages
---
 modules/core/src/logger.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/modules/core/src/logger.cpp b/modules/core/src/logger.cpp
index b6c076e2b5..e7c6669242 100644
--- a/modules/core/src/logger.cpp
+++ b/modules/core/src/logger.cpp
@@ -21,7 +21,13 @@ namespace logging {
 
 static LogLevel parseLogLevelConfiguration()
 {
-    static cv::String param_log_level = utils::getConfigurationParameterString("OPENCV_LOG_LEVEL", "INFO");
+    static cv::String param_log_level = utils::getConfigurationParameterString("OPENCV_LOG_LEVEL",
+#if defined NDEBUG
+            "WARNING"
+#else
+            "INFO"
+#endif
+    );
     if (param_log_level == "DISABLED" || param_log_level == "disabled" ||
         param_log_level == "0" || param_log_level == "OFF" || param_log_level == "off")
         return LOG_LEVEL_SILENT;

From caa6915b83b31eb4ae3c5c0b3e7059a29537c442 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Wed, 6 Jun 2018 18:23:29 +0300
Subject: [PATCH 08/33] apps: catch() with "const reference"

---
 apps/interactive-calibration/main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/interactive-calibration/main.cpp b/apps/interactive-calibration/main.cpp
index 10d3690b89..f3d1e5d66b 100644
--- a/apps/interactive-calibration/main.cpp
+++ b/apps/interactive-calibration/main.cpp
@@ -217,7 +217,7 @@ int main(int argc, char** argv)
                 (*it)->resetState();
         }
     }
-    catch (std::runtime_error exp) {
+    catch (const std::runtime_error& exp) {
         std::cout << exp.what() << std::endl;
     }
 

From 9b0bafb82c3cbb6e8afb1c40126a69b5d72c8ef4 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Wed, 6 Jun 2018 18:29:05 +0300
Subject: [PATCH 09/33] samples: fix callback function type

---
 samples/tapi/clahe.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/tapi/clahe.cpp b/samples/tapi/clahe.cpp
index b663b020bf..0b7de45a60 100644
--- a/samples/tapi/clahe.cpp
+++ b/samples/tapi/clahe.cpp
@@ -14,7 +14,7 @@ Ptr<CLAHE> pFilter;
 int tilesize;
 int cliplimit;
 
-static void TSize_Callback(int pos)
+static void TSize_Callback(int pos, void* /*data*/)
 {
     if(pos==0)
         pFilter->setTilesGridSize(Size(1,1));
@@ -22,7 +22,7 @@ static void TSize_Callback(int pos)
         pFilter->setTilesGridSize(Size(tilesize,tilesize));
 }
 
-static void Clip_Callback(int)
+static void Clip_Callback(int, void* /*data*/)
 {
     pFilter->setClipLimit(cliplimit);
 }

From 2e22f8e79018162fb2c823900b847882ea32c8cf Mon Sep 17 00:00:00 2001
From: Vitaly Tuzov <terfendail@mediana.jetos.com>
Date: Tue, 29 May 2018 20:11:49 +0300
Subject: [PATCH 10/33] Fix for morphologyEx MORPH_HITMISS mode

---
 modules/imgproc/src/morph.cpp        | 10 ++++++----
 modules/imgproc/test/test_filter.cpp |  6 ++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp
index 0c3a1e4088..6f75e67123 100644
--- a/modules/imgproc/src/morph.cpp
+++ b/modules/imgproc/src/morph.cpp
@@ -2115,16 +2115,18 @@ void cv::morphologyEx( InputArray _src, OutputArray _dst, int op,
             k2 = (kernel == -1);
 
             if (countNonZero(k1) <= 0)
-                e1 = src;
+                e1 = Mat(src.size(), src.type(), Scalar(255));
             else
                 erode(src, e1, k1, anchor, iterations, borderType, borderValue);
 
-            Mat src_complement;
-            bitwise_not(src, src_complement);
             if (countNonZero(k2) <= 0)
-                e2 = src_complement;
+                e2 = Mat(src.size(), src.type(), Scalar(255));
             else
+            {
+                Mat src_complement;
+                bitwise_not(src, src_complement);
                 erode(src_complement, e2, k2, anchor, iterations, borderType, borderValue);
+            }
             dst = e1 & e2;
         }
         break;
diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp
index 804317800d..e2da595ebe 100644
--- a/modules/imgproc/test/test_filter.cpp
+++ b/modules/imgproc/test/test_filter.cpp
@@ -2102,6 +2102,12 @@ TEST(Imgproc_MorphEx, hitmiss_regression_8957)
     ref.at<uchar>(1, 1) = 255;
 
     ASSERT_DOUBLE_EQ(cvtest::norm(dst, ref, NORM_INF), 0.);
+
+    src.at<uchar>(1, 1) = 255;
+    ref.at<uchar>(0, 1) = 255;
+    ref.at<uchar>(2, 1) = 255;
+    cv::morphologyEx(src, dst, MORPH_HITMISS, kernel);
+    ASSERT_DOUBLE_EQ(cvtest::norm(dst, ref, NORM_INF), 0.);
 }
 
 TEST(Imgproc_MorphEx, hitmiss_zero_kernel)

From f581992a62384eb72efd25cc430a4057e714501e Mon Sep 17 00:00:00 2001
From: "Kyle D. Patterson" <kdpatters@users.noreply.github.com>
Date: Wed, 6 Jun 2018 16:26:11 -0400
Subject: [PATCH 11/33] Update py_calibration.markdown

Improved readability by correcting grammar and idioms.

Further improved language and readability.

Attempted to fix list bullets.

Again, attempted to fix list bullets.

Removed trailing whitespace on line 8.
---
 .../py_calibration/py_calibration.markdown    | 94 +++++++++----------
 1 file changed, 45 insertions(+), 49 deletions(-)

diff --git a/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown b/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
index cb5d0ad4d6..f56e639005 100644
--- a/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
+++ b/doc/py_tutorials/py_calib3d/py_calibration/py_calibration.markdown
@@ -4,32 +4,34 @@ Camera Calibration {#tutorial_py_calibration}
 Goal
 ----
 
-In this section,
-    -   We will learn about distortions in camera, intrinsic and extrinsic parameters of camera etc.
-    -   We will learn to find these parameters, undistort images etc.
+In this section, we will learn about
+
+* types of distortion caused by cameras
+* how to find the intrinsic and extrinsic properties of a camera
+* how to undistort images based off these properties
 
 Basics
 ------
 
-Today's cheap pinhole cameras introduces a lot of distortion to images. Two major distortions are
+Some pinhole cameras introduce significant distortion to images. Two major kinds of distortion are
 radial distortion and tangential distortion.
 
-Due to radial distortion, straight lines will appear curved. Its effect is more as we move away from
-the center of image. For example, one image is shown below, where two edges of a chess board are
-marked with red lines. But you can see that border is not a straight line and doesn't match with the
+Radial distortion causes straight lines to appear curved. Radial distortion becomes larger the farther points are from
+the center of the image. For example, one image is shown below in which two edges of a chess board are
+marked with red lines. But, you can see that the border of the chess board is not a straight line and doesn't match with the
 red line. All the expected straight lines are bulged out. Visit [Distortion
 (optics)](http://en.wikipedia.org/wiki/Distortion_%28optics%29) for more details.
 
 ![image](images/calib_radial.jpg)
 
-This distortion is represented as follows:
+Radial distortion can be represented as follows:
 
 \f[x_{distorted} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\
 y_{distorted} = y( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6)\f]
 
-Similarly, another distortion is the tangential distortion which occurs because image taking lense
-is not aligned perfectly parallel to the imaging plane. So some areas in image may look nearer than
-expected. It is represented as below:
+Similarly, tangential distortion occurs because the image-taking lense
+is not aligned perfectly parallel to the imaging plane. So, some areas in the image may look nearer than
+expected. The amount of tangential distortion can be represented as below:
 
 \f[x_{distorted} = x + [ 2p_1xy + p_2(r^2+2x^2)] \\
 y_{distorted} = y + [ p_1(r^2+ 2y^2)+ 2p_2xy]\f]
@@ -38,10 +40,9 @@ In short, we need to find five parameters, known as distortion coefficients give
 
 \f[Distortion \; coefficients=(k_1 \hspace{10pt} k_2 \hspace{10pt} p_1 \hspace{10pt} p_2 \hspace{10pt} k_3)\f]
 
-In addition to this, we need to find a few more information, like intrinsic and extrinsic parameters
-of a camera. Intrinsic parameters are specific to a camera. It includes information like focal
-length (\f$f_x,f_y\f$), optical centers (\f$c_x, c_y\f$) etc. It is also called camera matrix. It depends on
-the camera only, so once calculated, it can be stored for future purposes. It is expressed as a 3x3
+In addition to this, we need to some other information, like the intrinsic and extrinsic parameters
+of the camera. Intrinsic parameters are specific to a camera. They include information like focal
+length (\f$f_x,f_y\f$) and optical centers (\f$c_x, c_y\f$). The focal length and optical centers can be used to create a camera matrix, which can be used to remove distortion due to the lenses of a specific camera.  The camera matrix is unique to a specific camera, so once calculated, it can be reused on other images taken by the same camera. It is expressed as a 3x3
 matrix:
 
 \f[camera \; matrix = \left [ \begin{matrix}   f_x & 0 & c_x \\  0 & f_y & c_y \\   0 & 0 & 1 \end{matrix} \right ]\f]
@@ -49,20 +50,16 @@ matrix:
 Extrinsic parameters corresponds to rotation and translation vectors which translates a coordinates
 of a 3D point to a coordinate system.
 
-For stereo applications, these distortions need to be corrected first. To find all these parameters,
-what we have to do is to provide some sample images of a well defined pattern (eg, chess board). We
-find some specific points in it ( square corners in chess board). We know its coordinates in real
-world space and we know its coordinates in image. With these data, some mathematical problem is
-solved in background to get the distortion coefficients. That is the summary of the whole story. For
-better results, we need atleast 10 test patterns.
+For stereo applications, these distortions need to be corrected first. To find these parameters,
+we must provide some sample images of a well defined pattern (e.g. a chess board). We
+find some specific points of which we already know the relative positions (e.g. square corners in the chess board). We know the coordinates of these points in real world space and we know the coordinates in the image, so we can solve for the distortion coefficients. For better results, we need at least 10 test patterns.
 
 Code
 ----
 
-As mentioned above, we need atleast 10 test patterns for camera calibration. OpenCV comes with some
-images of chess board (see samples/cpp/left01.jpg -- left14.jpg), so we will utilize it. For sake of
-understanding, consider just one image of a chess board. Important input datas needed for camera
-calibration is a set of 3D real world points and its corresponding 2D image points. 2D image points
+As mentioned above, we need at least 10 test patterns for camera calibration. OpenCV comes with some
+images of a chess board (see samples/data/left01.jpg -- left14.jpg), so we will utilize these. Consider an image of a chess board. The important input data needed for calibration of the camera
+is the set of 3D real world points and the corresponding 2D coordinates of these points in the image. 2D image points
 are OK which we can easily find from the image. (These image points are locations where two black
 squares touch each other in chess boards)
 
@@ -72,7 +69,7 @@ values. But for simplicity, we can say chess board was kept stationary at XY pla
 and camera was moved accordingly. This consideration helps us to find only X,Y values. Now for X,Y
 values, we can simply pass the points as (0,0), (1,0), (2,0), ... which denotes the location of
 points. In this case, the results we get will be in the scale of size of chess board square. But if
-we know the square size, (say 30 mm), and we can pass the values as (0,0),(30,0),(60,0),..., we get
+we know the square size, (say 30 mm), we can pass the values as (0,0), (30,0), (60,0), ... .  Thus, we get
 the results in mm. (In this case, we don't know square size since we didn't take those images, so we
 pass in terms of square size).
 
@@ -80,23 +77,22 @@ pass in terms of square size).
 
 ### Setup
 
-So to find pattern in chess board, we use the function, **cv.findChessboardCorners()**. We also
-need to pass what kind of pattern we are looking, like 8x8 grid, 5x5 grid etc. In this example, we
+So to find pattern in chess board, we can use the function, **cv.findChessboardCorners()**. We also
+need to pass what kind of pattern we are looking for, like 8x8 grid, 5x5 grid etc. In this example, we
 use 7x6 grid. (Normally a chess board has 8x8 squares and 7x7 internal corners). It returns the
 corner points and retval which will be True if pattern is obtained. These corners will be placed in
 an order (from left-to-right, top-to-bottom)
 
-@sa This function may not be able to find the required pattern in all the images. So one good option
+@sa This function may not be able to find the required pattern in all the images. So, one good option
 is to write the code such that, it starts the camera and check each frame for required pattern. Once
-pattern is obtained, find the corners and store it in a list. Also provides some interval before
+the pattern is obtained, find the corners and store it in a list. Also, provide some interval before
 reading next frame so that we can adjust our chess board in different direction. Continue this
-process until required number of good patterns are obtained. Even in the example provided here, we
-are not sure out of 14 images given, how many are good. So we read all the images and take the good
+process until the required number of good patterns are obtained. Even in the example provided here, we
+are not sure how many images out of the 14 given are good.  Thus, we must read all the images and take only the good
 ones.
 
-@sa Instead of chess board, we can use some circular grid, but then use the function
-**cv.findCirclesGrid()** to find the pattern. It is said that less number of images are enough when
-using circular grid.
+@sa Instead of chess board, we can alternatively use a circular grid.  In this case, we must use the function
+**cv.findCirclesGrid()** to find the pattern. Fewer images are sufficient to perform camera calibration using a circular grid.
 
 Once we find the corners, we can increase their accuracy using **cv.cornerSubPix()**. We can also
 draw the pattern using **cv.drawChessboardCorners()**. All these steps are included in below code:
@@ -146,22 +142,23 @@ One image with pattern drawn on it is shown below:
 
 ### Calibration
 
-So now we have our object points and image points we are ready to go for calibration. For that we
-use the function, **cv.calibrateCamera()**. It returns the camera matrix, distortion coefficients,
+Now that we have our object points and image points, we are ready to go for calibration. We can
+use the function, **cv.calibrateCamera()** which returns the camera matrix, distortion coefficients,
 rotation and translation vectors etc.
 @code{.py}
 ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
 @endcode
+
 ### Undistortion
 
-We have got what we were trying. Now we can take an image and undistort it. OpenCV comes with two
-methods, we will see both. But before that, we can refine the camera matrix based on a free scaling
+Now, we can take an image and undistort it. OpenCV comes with two
+methods for doing this. However first, we can refine the camera matrix based on a free scaling
 parameter using **cv.getOptimalNewCameraMatrix()**. If the scaling parameter alpha=0, it returns
 undistorted image with minimum unwanted pixels. So it may even remove some pixels at image corners.
-If alpha=1, all pixels are retained with some extra black images. It also returns an image ROI which
+If alpha=1, all pixels are retained with some extra black images. This function also returns an image ROI which
 can be used to crop the result.
 
-So we take a new image (left12.jpg in this case. That is the first image in this chapter)
+So, we take a new image (left12.jpg in this case. That is the first image in this chapter)
 @code{.py}
 img = cv.imread('left12.jpg')
 h,  w = img.shape[:2]
@@ -169,7 +166,7 @@ newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
 @endcode
 #### 1. Using **cv.undistort()**
 
-This is the shortest path. Just call the function and use ROI obtained above to crop the result.
+This is the easiest way. Just call the function and use ROI obtained above to crop the result.
 @code{.py}
 # undistort
 dst = cv.undistort(img, mtx, dist, None, newcameramtx)
@@ -181,7 +178,7 @@ cv.imwrite('calibresult.png', dst)
 @endcode
 #### 2. Using **remapping**
 
-This is curved path. First find a mapping function from distorted image to undistorted image. Then
+This way is a little bit more difficult. First, find a mapping function from the distorted image to the undistorted image. Then
 use the remap function.
 @code{.py}
 # undistort
@@ -193,23 +190,22 @@ x, y, w, h = roi
 dst = dst[y:y+h, x:x+w]
 cv.imwrite('calibresult.png', dst)
 @endcode
-Both the methods give the same result. See the result below:
+Still, both the methods give the same result. See the result below:
 
 ![image](images/calib_result.jpg)
 
 You can see in the result that all the edges are straight.
 
-Now you can store the camera matrix and distortion coefficients using write functions in Numpy
+Now you can store the camera matrix and distortion coefficients using write functions in NumPy
 (np.savez, np.savetxt etc) for future uses.
 
 Re-projection Error
 -------------------
 
-Re-projection error gives a good estimation of just how exact is the found parameters. This should
-be as close to zero as possible. Given the intrinsic, distortion, rotation and translation matrices,
-we first transform the object point to image point using **cv.projectPoints()**. Then we calculate
+Re-projection error gives a good estimation of just how exact the found parameters are. The closer the re-projection error is to zero, the more accurate the parameters we found are. Given the intrinsic, distortion, rotation and translation matrices,
+we must first transform the object point to image point using **cv.projectPoints()**. Then, we can calculate
 the absolute norm between what we got with our transformation and the corner finding algorithm. To
-find the average error we calculate the arithmetical mean of the errors calculate for all the
+find the average error, we calculate the arithmetical mean of the errors calculated for all the
 calibration images.
 @code{.py}
 mean_error = 0

From f3a6ae5f00428943ba7cbdfe5abab3cb90e1c237 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Tue, 5 Jun 2018 17:18:14 +0300
Subject: [PATCH 12/33] Wrap Inference Engine init to try-catch

---
 modules/dnn/perf/perf_net.cpp      |  2 +-
 modules/dnn/src/dnn.cpp            |  8 +++-
 modules/dnn/src/op_inf_engine.cpp  | 75 ++++++++++++++++++++----------
 modules/dnn/test/test_backends.cpp |  2 +-
 4 files changed, 59 insertions(+), 28 deletions(-)

diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index 8507a21dbb..fff74df130 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -34,7 +34,7 @@ public:
     void processNet(std::string weights, std::string proto, std::string halide_scheduler,
                     const Mat& input, const std::string& outputLayer = "")
     {
-        if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL)
+        if (backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
         {
 #if defined(HAVE_OPENCL)
             if (!cv::ocl::useOpenCL())
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 819ad6356a..98d6fdc186 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -2252,7 +2252,13 @@ void Net::setPreferableTarget(int targetId)
         if (IS_DNN_OPENCL_TARGET(targetId))
         {
 #ifndef HAVE_OPENCL
-            impl->preferableTarget = DNN_TARGET_CPU;
+#ifdef HAVE_INF_ENGINE
+            if (impl->preferableBackend == DNN_BACKEND_OPENCV)
+#else
+            if (impl->preferableBackend == DNN_BACKEND_DEFAULT ||
+                impl->preferableBackend == DNN_BACKEND_OPENCV)
+#endif  // HAVE_INF_ENGINE
+                impl->preferableTarget = DNN_TARGET_CPU;
 #else
             bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
             if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp
index b03a67d5f3..9481fc347c 100644
--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@@ -361,35 +361,60 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
 {
     CV_Assert(!isInitialized());
 
-    static std::map<std::string, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
-    std::string deviceName = InferenceEngine::getDeviceName(targetDevice);
-    auto pluginIt = sharedPlugins.find(deviceName);
-    if (pluginIt != sharedPlugins.end())
+    try
     {
-        enginePtr = pluginIt->second;
-    }
-    else
-    {
-        enginePtr = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(targetDevice);
-        sharedPlugins[deviceName] = enginePtr;
-    }
-    plugin = InferenceEngine::InferencePlugin(enginePtr);
+        static std::map<std::string, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
+        std::string deviceName = InferenceEngine::getDeviceName(targetDevice);
+        auto pluginIt = sharedPlugins.find(deviceName);
+        if (pluginIt != sharedPlugins.end())
+        {
+            enginePtr = pluginIt->second;
+        }
+        else
+        {
+            enginePtr = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(targetDevice);
+            sharedPlugins[deviceName] = enginePtr;
 
-    if (targetDevice == InferenceEngine::TargetDevice::eCPU)
+            if (targetDevice == InferenceEngine::TargetDevice::eCPU)
+            {
+                std::string suffixes[] = {"_avx2", "_sse4", ""};
+                bool haveFeature[] = {
+                    checkHardwareSupport(CPU_AVX2),
+                    checkHardwareSupport(CPU_SSE4_2),
+                    true
+                };
+                for (int i = 0; i < 3; ++i)
+                {
+                    if (!haveFeature[i])
+                        continue;
+    #ifdef _WIN32
+                    std::string libName = "cpu_extension" + suffixes[i] + ".dll";
+    #else
+                    std::string libName = "libcpu_extension" + suffixes[i] + ".so";
+    #endif  // _WIN32
+                    try
+                    {
+                        InferenceEngine::IExtensionPtr extension =
+                            InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(libName);
+                        enginePtr->AddExtension(extension, 0);
+                        break;
+                    }
+                    catch(...) {}
+                }
+                // Some of networks can work without a library of extra layers.
+            }
+        }
+        plugin = InferenceEngine::InferencePlugin(enginePtr);
+
+        netExec = plugin.LoadNetwork(net, {});
+        infRequest = netExec.CreateInferRequest();
+        infRequest.SetInput(inpBlobs);
+        infRequest.SetOutput(outBlobs);
+    }
+    catch (const std::exception& ex)
     {
-#ifdef _WIN32
-        InferenceEngine::IExtensionPtr extension =
-            InferenceEngine::make_so_pointer<InferenceEngine::IExtension>("cpu_extension.dll");
-#else
-        InferenceEngine::IExtensionPtr extension =
-            InferenceEngine::make_so_pointer<InferenceEngine::IExtension>("libcpu_extension.so");
-#endif  // _WIN32
-        plugin.AddExtension(extension);
+        CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what()));
     }
-    netExec = plugin.LoadNetwork(net, {});
-    infRequest = netExec.CreateInferRequest();
-    infRequest.SetInput(inpBlobs);
-    infRequest.SetOutput(outBlobs);
 }
 
 bool InfEngineBackendNet::isInitialized()
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index 88b8a17958..6f41610fb5 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -40,7 +40,7 @@ public:
                     std::string halideScheduler = "",
                     double l1 = 0.0, double lInf = 0.0)
     {
-        if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL)
+        if (backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
         {
 #ifdef HAVE_OPENCL
             if (!cv::ocl::useOpenCL())

From e0d28aa893639c3cf0a5a01e475050a7f68c038e Mon Sep 17 00:00:00 2001
From: Sayed Adel <seiko@imavr.com>
Date: Thu, 7 Jun 2018 14:16:48 +0300
Subject: [PATCH 13/33] core:ppc Fix java CoreTest/testMahalanobis (#11677)

* core:ppc Fix java CoreTest/testMahalanobis

* core:ppc Fix warning unused variable on clang
---
 modules/core/misc/java/test/CoreTest.java | 10 +++++++---
 modules/core/src/system.cpp               |  1 -
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/modules/core/misc/java/test/CoreTest.java b/modules/core/misc/java/test/CoreTest.java
index c608fb2053..42c343b2ba 100644
--- a/modules/core/misc/java/test/CoreTest.java
+++ b/modules/core/misc/java/test/CoreTest.java
@@ -947,12 +947,16 @@ public class CoreTest extends OpenCVTestCase {
     }
 
     public void testMahalanobis() {
+        Mat src = new Mat(matSize, matSize, CvType.CV_32F);
+        Core.randu(src, -128, 128);
+
         Mat covar = new Mat(matSize, matSize, CvType.CV_32F);
         Mat mean = new Mat(1, matSize, CvType.CV_32F);
-        Core.calcCovarMatrix(grayRnd_32f, covar, mean, Core.COVAR_ROWS | Core.COVAR_NORMAL, CvType.CV_32F);
+        Core.calcCovarMatrix(src, covar, mean, Core.COVAR_ROWS | Core.COVAR_NORMAL, CvType.CV_32F);
         covar = covar.inv();
-        Mat line1 = grayRnd_32f.row(0);
-        Mat line2 = grayRnd_32f.row(1);
+
+        Mat line1 = src.row(0);
+        Mat line2 = src.row(1);
 
         double d = Core.Mahalanobis(line1, line1, covar);
 
diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp
index e3ee27cf78..49f9cc09ba 100644
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -739,7 +739,6 @@ int64 getCPUTickCount(void)
 
 int64 getCPUTickCount(void)
 {
-    int64 result = 0;
     unsigned upper, lower, tmp;
     __asm__ volatile(
                      "0:                  \n"

From dd7f88bd68f95e56437035cc95d4af482482fcc2 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Thu, 7 Jun 2018 12:32:48 +0300
Subject: [PATCH 14/33] python: support standalone Python bindings build

- requires OpenCV source directory
- requires OpenCV binary directory with built modules and 'python_bindings_generator' target
---
 cmake/OpenCVDetectPython.cmake         | 21 ++++++++--
 modules/python/CMakeLists.txt          |  8 ++++
 modules/python/bindings/CMakeLists.txt | 30 ++++++++++++++
 modules/python/common.cmake            | 28 +++++++++----
 modules/python/python2/CMakeLists.txt  |  6 ---
 modules/python/python3/CMakeLists.txt  |  6 ---
 modules/python/standalone.cmake        | 57 ++++++++++++++++++++++++++
 7 files changed, 133 insertions(+), 23 deletions(-)
 create mode 100644 modules/python/standalone.cmake

diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake
index 6dec76ff66..b6c7a2535c 100644
--- a/cmake/OpenCVDetectPython.cmake
+++ b/cmake/OpenCVDetectPython.cmake
@@ -27,6 +27,12 @@ function(find_python preferred_version min_version library_env include_dir_env
          debug_library include_path include_dir include_dir2 packages_path
          numpy_include_dirs numpy_version)
 if(NOT ${found})
+  if(" ${executable}" STREQUAL " PYTHON_EXECUTABLE")
+    set(__update_python_vars 0)
+  else()
+    set(__update_python_vars 1)
+  endif()
+
   ocv_check_environment_variables(${executable})
   if(${executable})
     set(PYTHON_EXECUTABLE "${${executable}}")
@@ -47,7 +53,7 @@ if(NOT ${found})
     endforeach()
   endif()
 
-  string(REGEX MATCH "^[0-9]+" _preferred_version_major ${preferred_version})
+  string(REGEX MATCH "^[0-9]+" _preferred_version_major "${preferred_version}")
 
   find_host_package(PythonInterp "${preferred_version}")
   if(NOT PYTHONINTERP_FOUND)
@@ -56,7 +62,7 @@ if(NOT ${found})
 
   if(PYTHONINTERP_FOUND)
     # Check if python major version is correct
-    if(${_preferred_version_major} EQUAL ${PYTHON_VERSION_MAJOR})
+    if("${_preferred_version_major}" STREQUAL "" OR "${_preferred_version_major}" STREQUAL "${PYTHON_VERSION_MAJOR}")
       # Copy outputs
       set(_found ${PYTHONINTERP_FOUND})
       set(_executable ${PYTHON_EXECUTABLE})
@@ -65,7 +71,9 @@ if(NOT ${found})
       set(_version_minor ${PYTHON_VERSION_MINOR})
       set(_version_patch ${PYTHON_VERSION_PATCH})
     endif()
+  endif()
 
+  if(__update_python_vars)
     # Clear find_host_package side effects
     unset(PYTHONINTERP_FOUND)
     unset(PYTHON_EXECUTABLE CACHE)
@@ -109,7 +117,8 @@ if(NOT ${found})
         set(_library_release ${PYTHON_LIBRARY_RELEASE})
         set(_include_dir ${PYTHON_INCLUDE_DIR})
         set(_include_dir2 ${PYTHON_INCLUDE_DIR2})
-
+      endif()
+      if(__update_python_vars)
         # Clear find_package side effects
         unset(PYTHONLIBS_FOUND)
         unset(PYTHON_LIBRARIES)
@@ -160,7 +169,7 @@ if(NOT ${found})
         unset(_path)
       endif()
 
-      set(_numpy_include_dirs ${${numpy_include_dirs}})
+      set(_numpy_include_dirs "${${numpy_include_dirs}}")
 
       if(NOT _numpy_include_dirs)
         if(CMAKE_CROSSCOMPILING)
@@ -222,6 +231,10 @@ if(NOT ${found})
 endif()
 endfunction(find_python)
 
+if(OPENCV_PYTHON_SKIP_DETECTION)
+  return()
+endif()
+
 find_python(2.7 "${MIN_VER_PYTHON2}" PYTHON2_LIBRARY PYTHON2_INCLUDE_DIR
     PYTHON2INTERP_FOUND PYTHON2_EXECUTABLE PYTHON2_VERSION_STRING
     PYTHON2_VERSION_MAJOR PYTHON2_VERSION_MINOR PYTHON2LIBS_FOUND
diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt
index dffe9a794e..bcaa7d957a 100644
--- a/modules/python/CMakeLists.txt
+++ b/modules/python/CMakeLists.txt
@@ -1,6 +1,7 @@
 # ----------------------------------------------------------------------------
 #  CMake file for python support
 # ----------------------------------------------------------------------------
+if(DEFINED OPENCV_INITIAL_PASS)  # OpenCV build
 
 add_subdirectory(bindings)
 
@@ -28,3 +29,10 @@ endif()
 
 add_subdirectory(python2)
 add_subdirectory(python3)
+
+else()  # standalone build
+
+cmake_minimum_required(VERSION 2.8.12)
+include("./standalone.cmake")
+
+endif()
diff --git a/modules/python/bindings/CMakeLists.txt b/modules/python/bindings/CMakeLists.txt
index 73c67aa3ce..f7c86e0250 100644
--- a/modules/python/bindings/CMakeLists.txt
+++ b/modules/python/bindings/CMakeLists.txt
@@ -37,6 +37,7 @@ ocv_list_filterout(opencv_hdrs "modules/.+/utils/.*")
 ocv_list_filterout(opencv_hdrs "modules/.*\\\\.inl\\\\.h*")
 ocv_list_filterout(opencv_hdrs "modules/.*_inl\\\\.h*")
 ocv_list_filterout(opencv_hdrs "modules/.*\\\\.details\\\\.h*")
+ocv_list_filterout(opencv_hdrs "modules/.*\\\\.private\\\\.h*")
 ocv_list_filterout(opencv_hdrs "modules/.*/detection_based_tracker\\\\.hpp") # Conditional compilation
 
 set(cv2_generated_files
@@ -78,3 +79,32 @@ else()
   file(WRITE "${cv2_custom_hdr}" "${cv2_custom_hdr_str}")
 endif()
 unset(__content)
+
+
+#
+# Configuration for standalone build of Python bindings
+#
+set(PYTHON_CONFIG_SCRIPT "")
+ocv_cmake_script_append_var(PYTHON_CONFIG_SCRIPT
+    CMAKE_BUILD_TYPE
+    BUILD_SHARED_LIBS
+
+    CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+    CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+
+    CV_GCC CV_CLANG ENABLE_NOISY_WARNINGS
+
+    CMAKE_MODULE_LINKER_FLAGS
+    CMAKE_INSTALL_PREFIX
+
+    OpenCV_SOURCE_DIR
+
+    OPENCV_FORCE_PYTHON_LIBS
+    OPENCV_PYTHON_SKIP_LINKER_EXCLUDE_LIBS
+
+    OPENCV_PYTHON_BINDINGS_DIR
+    cv2_custom_hdr
+    cv2_generated_files
+)
+set(CMAKE_HELPER_SCRIPT "${CMAKE_BINARY_DIR}/opencv_python_config.cmake")
+file(GENERATE OUTPUT "${CMAKE_HELPER_SCRIPT}" CONTENT "${PYTHON_CONFIG_SCRIPT}")
diff --git a/modules/python/common.cmake b/modules/python/common.cmake
index 80cd2b6fc3..cbb79b8c44 100644
--- a/modules/python/common.cmake
+++ b/modules/python/common.cmake
@@ -1,5 +1,5 @@
 # This file is included from a subdirectory
-set(PYTHON_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../")
+set(PYTHON_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}")
 
 ocv_add_module(${MODULE_NAME} BINDINGS PRIVATE_REQUIRED opencv_python_bindings_generator)
 
@@ -20,7 +20,9 @@ if(NOT WIN32 AND NOT APPLE AND NOT OPENCV_PYTHON_SKIP_LINKER_EXCLUDE_LIBS)
 endif()
 
 ocv_add_library(${the_module} MODULE ${PYTHON_SOURCE_DIR}/src2/cv2.cpp ${cv2_generated_hdrs} ${opencv_userdef_hdrs} ${cv2_custom_hdr})
-add_dependencies(${the_module} gen_opencv_python_source)
+if(TARGET gen_opencv_python_source)
+  add_dependencies(${the_module} gen_opencv_python_source)
+endif()
 
 if(APPLE)
   set_target_properties(${the_module} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
@@ -32,8 +34,10 @@ elseif(WIN32 OR OPENCV_FORCE_PYTHON_LIBS)
   endif()
 endif()
 
-set(deps ${OPENCV_MODULE_${the_module}_DEPS})
-list(REMOVE_ITEM deps opencv_python_bindings_generator) # don't add dummy module to target_link_libraries list
+if(TARGET gen_opencv_python_source)
+  set(deps ${OPENCV_MODULE_${the_module}_DEPS})
+  list(REMOVE_ITEM deps opencv_python_bindings_generator) # don't add dummy module to target_link_libraries list
+endif()
 ocv_target_link_libraries(${the_module} LINK_PRIVATE ${deps})
 
 if(DEFINED ${PYTHON}_CVPY_SUFFIX)
@@ -75,8 +79,16 @@ if(MSVC AND NOT ENABLE_NOISY_WARNINGS)
   string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Woverloaded-virtual -Wunused-private-field)
-ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef) # accurate guard via #pragma doesn't work (C++ preprocessor doesn't handle #pragma)
+
+if(MSVC)
+  ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4996)
+else()
+  ocv_warnings_disable(CMAKE_CXX_FLAGS
+      -Wdeprecated-declarations
+      -Woverloaded-virtual -Wunused-private-field
+      -Wundef # accurate guard via #pragma doesn't work (C++ preprocessor doesn't handle #pragma)
+  )
+endif()
 
 if(MSVC AND NOT BUILD_SHARED_LIBS)
   set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG")
@@ -94,7 +106,9 @@ else()
   set(PYTHON_INSTALL_ARCHIVE ARCHIVE DESTINATION ${${PYTHON}_PACKAGES_PATH} COMPONENT python)
 endif()
 
-if(NOT INSTALL_CREATE_DISTRIB AND DEFINED ${PYTHON}_PACKAGES_PATH)
+if(DEFINED OPENCV_${PYTHON}_INSTALL_PATH)
+  set(__dst "${OPENCV_${PYTHON}_INSTALL_PATH}")
+elseif(NOT INSTALL_CREATE_DISTRIB AND DEFINED ${PYTHON}_PACKAGES_PATH)
   set(__dst "${${PYTHON}_PACKAGES_PATH}")
 endif()
 if(NOT __dst)
diff --git a/modules/python/python2/CMakeLists.txt b/modules/python/python2/CMakeLists.txt
index bf55ef834a..37e20fe330 100644
--- a/modules/python/python2/CMakeLists.txt
+++ b/modules/python/python2/CMakeLists.txt
@@ -13,9 +13,3 @@ include(../common.cmake)
 
 unset(MODULE_NAME)
 unset(MODULE_INSTALL_SUBDIR)
-
-if(MSVC)
-  ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4996)
-else()
-  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wdeprecated-declarations)
-endif()
diff --git a/modules/python/python3/CMakeLists.txt b/modules/python/python3/CMakeLists.txt
index b3a725397c..da86ba5c5e 100644
--- a/modules/python/python3/CMakeLists.txt
+++ b/modules/python/python3/CMakeLists.txt
@@ -12,9 +12,3 @@ include(../common.cmake)
 
 unset(MODULE_NAME)
 unset(MODULE_INSTALL_SUBDIR)
-
-if(MSVC)
-  ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4996)
-else()
-  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wdeprecated-declarations)
-endif()
diff --git a/modules/python/standalone.cmake b/modules/python/standalone.cmake
new file mode 100644
index 0000000000..d21420a662
--- /dev/null
+++ b/modules/python/standalone.cmake
@@ -0,0 +1,57 @@
+if(NOT DEFINED OpenCV_BINARY_DIR)
+  message(FATAL_ERROR "Define OpenCV_BINARY_DIR")
+endif()
+include("${OpenCV_BINARY_DIR}/opencv_python_config.cmake")
+if(NOT DEFINED OpenCV_SOURCE_DIR)
+  message(FATAL_ERROR "Missing define of OpenCV_SOURCE_DIR")
+endif()
+
+include("${OpenCV_SOURCE_DIR}/cmake/OpenCVUtils.cmake")
+
+set(OPENCV_PYTHON_SKIP_DETECTION ON)
+include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectPython.cmake")
+find_python("" "2.7" PYTHON_LIBRARY PYTHON_INCLUDE_DIR
+    PYTHONINTERP_FOUND PYTHON_EXECUTABLE PYTHON_VERSION_STRING
+    PYTHON_VERSION_MAJOR PYTHON_VERSION_MINOR PYTHONLIBS_FOUND
+    PYTHONLIBS_VERSION_STRING PYTHON_LIBRARIES PYTHON_LIBRARY
+    PYTHON_DEBUG_LIBRARIES PYTHON_LIBRARY_DEBUG PYTHON_INCLUDE_PATH
+    PYTHON_INCLUDE_DIR PYTHON_INCLUDE_DIR2 PYTHON_PACKAGES_PATH
+    PYTHON_NUMPY_INCLUDE_DIRS PYTHON_NUMPY_VERSION)
+if(NOT PYTHON_EXECUTABLE OR NOT PYTHON_INCLUDE_DIR)
+  message(FATAL_ERROR "Can't find Python development files")
+endif()
+if(NOT PYTHON_NUMPY_INCLUDE_DIRS)
+  message(FATAL_ERROR "Can't find Python 'numpy' development files")
+endif()
+
+status("-----------------------------------------------------------------")
+status("  Python:")
+status("    Interpreter:"   "${PYTHON_EXECUTABLE} (ver ${PYTHON_VERSION_STRING})")
+status("    Libraries:"     "${PYTHON_LIBRARIES} (ver ${PYTHONLIBS_VERSION_STRING})")
+status("    numpy:"         "${PYTHON_NUMPY_INCLUDE_DIRS} (ver ${PYTHON_NUMPY_VERSION})")
+status("")
+status("  Install to:" "${CMAKE_INSTALL_PREFIX}")
+status("-----------------------------------------------------------------")
+
+set(OpenCV_DIR "${OpenCV_BINARY_DIR}")
+find_package(OpenCV REQUIRED)
+
+set(PYTHON PYTHON)
+
+macro(ocv_add_module module_name)
+  set(the_module opencv_${module_name})
+  project(${the_module} CXX)
+endmacro()
+
+macro(ocv_module_include_directories module)
+  include_directories(${ARGN})
+endmacro()
+
+set(MODULE_NAME python)
+set(MODULE_INSTALL_SUBDIR "")
+set(LIBRARY_OUTPUT_PATH "${CMAKE_BINARY_DIR}/lib")
+set(deps ${OpenCV_LIBRARIES})
+include("${CMAKE_CURRENT_LIST_DIR}/common.cmake")  # generate python target
+
+# done, cleanup
+unset(OPENCV_BUILD_INFO_STR CACHE)  # remove from cache

From 7175f257b513fd1e45d108dda2d1f56575d839e8 Mon Sep 17 00:00:00 2001
From: David <34099314+dmonterom@users.noreply.github.com>
Date: Thu, 7 Jun 2018 15:29:04 +0200
Subject: [PATCH 15/33] Added ResizeBilinear op for tf (#11050)

* Added ResizeBilinear op for tf

Combined ResizeNearestNeighbor and ResizeBilinear layers into Resize (with an interpolation param).

Minor changes to tf_importer and resize layer to save some code lines

Minor changes in init.cpp

Minor changes in tf_importer.cpp

* Replaced implementation of a custom ResizeBilinear layer to all layers

* Use Mat::ptr. Replace interpolation flags
---
 .../dnn/include/opencv2/dnn/all_layers.hpp    |   8 +-
 modules/dnn/perf/perf_net.cpp                 |   8 +
 modules/dnn/src/darknet/darknet_io.cpp        |   3 +-
 modules/dnn/src/init.cpp                      |   2 +-
 .../dnn/src/layers/crop_and_resize_layer.cpp  |   2 +-
 modules/dnn/src/layers/resize_layer.cpp       | 176 +++++++++++++++++
 .../layers/resize_nearest_neighbor_layer.cpp  | 117 -----------
 modules/dnn/src/tensorflow/tf_importer.cpp    |  30 ++-
 modules/dnn/test/test_tf_importer.cpp         | 186 ++++--------------
 samples/dnn/text_detection.cpp                |   5 -
 10 files changed, 253 insertions(+), 284 deletions(-)
 create mode 100644 modules/dnn/src/layers/resize_layer.cpp
 delete mode 100644 modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp

diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
index cc8521586c..55b85a0b56 100644
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -565,14 +565,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     };
 
     /**
-     * @brief Resize input 4-dimensional blob by nearest neighbor strategy.
+     * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy.
      *
-     * Layer is used to support TensorFlow's resize_nearest_neighbor op.
+     * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops.
      */
-    class CV_EXPORTS ResizeNearestNeighborLayer : public Layer
+    class CV_EXPORTS ResizeLayer : public Layer
     {
     public:
-        static Ptr<ResizeNearestNeighborLayer> create(const LayerParams& params);
+        static Ptr<ResizeLayer> create(const LayerParams& params);
     };
 
     class CV_EXPORTS ProposalLayer : public Layer
diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index fff74df130..8e777f8ae5 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -236,6 +236,14 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
     processNet("dnn/yolov3.cfg", "dnn/yolov3.weights", "", inp / 255);
 }
 
+PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
+{
+    if (backend == DNN_BACKEND_HALIDE ||
+        backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    processNet("dnn/frozen_east_text_detection.pb", "", "", Mat(cv::Size(320, 320), CV_32FC3));
+}
+
 const tuple<DNNBackend, DNNTarget> testCases[] = {
 #ifdef HAVE_HALIDE
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp
index 707cc29095..91ebb0fa8b 100644
--- a/modules/dnn/src/darknet/darknet_io.cpp
+++ b/modules/dnn/src/darknet/darknet_io.cpp
@@ -395,9 +395,10 @@ namespace cv {
                 {
                     cv::dnn::LayerParams param;
                     param.name = "Upsample-name";
-                    param.type = "ResizeNearestNeighbor";
+                    param.type = "Resize";
 
                     param.set<int>("zoom_factor", scaleFactor);
+                    param.set<String>("interpolation", "nearest");
 
                     darknet::LayerParameter lp;
                     std::string layer_name = cv::format("upsample_%d", layer_id);
diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp
index 2bff16c4eb..e5c3a279e5 100644
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -83,7 +83,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(Concat,         ConcatLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Reshape,        ReshapeLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Flatten,        FlattenLayer);
-    CV_DNN_REGISTER_LAYER_CLASS(ResizeNearestNeighbor, ResizeNearestNeighborLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Resize,         ResizeLayer);
     CV_DNN_REGISTER_LAYER_CLASS(CropAndResize,  CropAndResizeLayer);
 
     CV_DNN_REGISTER_LAYER_CLASS(Convolution,    ConvolutionLayer);
diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp
index 3f92a8488d..a9bca1f04b 100644
--- a/modules/dnn/src/layers/crop_and_resize_layer.cpp
+++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp
@@ -68,7 +68,7 @@ public:
             {
                 float input_y = top * (inpHeight - 1) + y * heightScale;
                 int y0 = static_cast<int>(input_y);
-                const float* inpData_row0 = (float*)inp.data + y0 * inpWidth;
+                const float* inpData_row0 = inp.ptr<float>(0, 0, y0);
                 const float* inpData_row1 = (y0 + 1 < inpHeight) ? (inpData_row0 + inpWidth) : inpData_row0;
                 for (int x = 0; x < outWidth; ++x)
                 {
diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp
new file mode 100644
index 0000000000..82bc6542be
--- /dev/null
+++ b/modules/dnn/src/layers/resize_layer.cpp
@@ -0,0 +1,176 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "../op_inf_engine.hpp"
+#include <opencv2/imgproc.hpp>
+
+namespace cv { namespace dnn {
+
+class ResizeLayerImpl CV_FINAL : public ResizeLayer
+{
+public:
+    ResizeLayerImpl(const LayerParams& params)
+    {
+        setParamsFrom(params);
+        outWidth = params.get<float>("width", 0);
+        outHeight = params.get<float>("height", 0);
+        if (params.has("zoom_factor"))
+        {
+            CV_Assert(!params.has("zoom_factor_x") && !params.has("zoom_factor_y"));
+            zoomFactorWidth = zoomFactorHeight = params.get<int>("zoom_factor");
+        }
+        else if (params.has("zoom_factor_x") || params.has("zoom_factor_y"))
+        {
+            CV_Assert(params.has("zoom_factor_x") && params.has("zoom_factor_y"));
+            zoomFactorWidth = params.get<int>("zoom_factor_x");
+            zoomFactorHeight = params.get<int>("zoom_factor_y");
+        }
+        interpolation = params.get<String>("interpolation");
+        CV_Assert(interpolation == "nearest" || interpolation == "bilinear");
+
+        alignCorners = params.get<bool>("align_corners", false);
+        if (alignCorners)
+            CV_Error(Error::StsNotImplemented, "Resize with align_corners=true is not implemented");
+    }
+
+    bool getMemoryShapes(const std::vector<MatShape> &inputs,
+                         const int requiredOutputs,
+                         std::vector<MatShape> &outputs,
+                         std::vector<MatShape> &internals) const CV_OVERRIDE
+    {
+        CV_Assert(inputs.size() == 1, inputs[0].size() == 4);
+        outputs.resize(1, inputs[0]);
+        outputs[0][2] = outHeight > 0 ? outHeight : (outputs[0][2] * zoomFactorHeight);
+        outputs[0][3] = outWidth > 0 ? outWidth : (outputs[0][3] * zoomFactorWidth);
+        // We can work in-place (do nothing) if input shape == output shape.
+        return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
+    }
+
+    virtual bool supportBackend(int backendId) CV_OVERRIDE
+    {
+        return backendId == DNN_BACKEND_OPENCV ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && interpolation == "nearest";
+    }
+
+    virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
+    {
+        if (!outWidth && !outHeight)
+        {
+            outHeight = outputs[0].size[2];
+            outWidth = outputs[0].size[3];
+        }
+    }
+
+    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
+    }
+
+    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        if (outHeight == inputs[0]->size[2] && outWidth == inputs[0]->size[3])
+            return;
+
+        Mat& inp = *inputs[0];
+        Mat& out = outputs[0];
+        if (interpolation == "nearest")
+        {
+            for (size_t n = 0; n < inputs[0]->size[0]; ++n)
+            {
+                for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
+                {
+                    resize(getPlane(inp, n, ch), getPlane(out, n, ch),
+                           Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
+                }
+            }
+        }
+        else if (interpolation == "bilinear")
+        {
+            const int inpHeight = inp.size[2];
+            const int inpWidth = inp.size[3];
+            const int inpSpatialSize = inpHeight * inpWidth;
+            const int outSpatialSize = outHeight * outWidth;
+            const float heightScale = static_cast<float>(inpHeight) / (outHeight);
+            const float widthScale = static_cast<float>(inpWidth) / (outWidth);
+            const int numPlanes = inp.size[0] * inp.size[1];
+            CV_Assert(inp.isContinuous(), out.isContinuous());
+
+            Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight);
+            Mat outPlanes = out.reshape(1, numPlanes * outHeight);
+            for (int y = 0; y < outHeight; ++y)
+            {
+                float input_y = y * heightScale;
+                int y0 = static_cast<int>(input_y);
+                const float* inpData_row0 = inpPlanes.ptr<float>(y0);
+                const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
+                for (int x = 0; x < outWidth; ++x)
+                {
+                    float input_x = x * widthScale;
+                    int x0 = static_cast<int>(input_x);
+                    int x1 = std::min(x0 + 1, inpWidth - 1);
+
+                    float* outData = outPlanes.ptr<float>(y, x);
+                    const float* inpData_row0_c = inpData_row0;
+                    const float* inpData_row1_c = inpData_row1;
+                    for (int c = 0; c < numPlanes; ++c)
+                    {
+                        *outData = inpData_row0_c[x0] +
+                            (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) +
+                            (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] +
+                            (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0]));
+
+                        inpData_row0_c += inpSpatialSize;
+                        inpData_row1_c += inpSpatialSize;
+                        outData += outSpatialSize;
+                    }
+                }
+            }
+        }
+        else
+            CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation);
+    }
+
+    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
+    {
+#ifdef HAVE_INF_ENGINE
+        InferenceEngine::LayerParams lp;
+        lp.name = name;
+        lp.type = "Resample";
+        lp.precision = InferenceEngine::Precision::FP32;
+
+        std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
+        ieLayer->params["type"] = "caffe.ResampleParameter.NEAREST";
+        ieLayer->params["antialias"] = "0";
+        ieLayer->params["width"] = cv::format("%d", outWidth);
+        ieLayer->params["height"] = cv::format("%d", outHeight);
+
+        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
+#endif  // HAVE_INF_ENGINE
+        return Ptr<BackendNode>();
+    }
+
+private:
+    int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight;
+    String interpolation;
+    bool alignCorners;
+};
+
+
+Ptr<ResizeLayer> ResizeLayer::create(const LayerParams& params)
+{
+    return Ptr<ResizeLayer>(new ResizeLayerImpl(params));
+}
+
+}  // namespace dnn
+}  // namespace cv
diff --git a/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp b/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp
deleted file mode 100644
index 703b7a438f..0000000000
--- a/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-// Copyright (C) 2017, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-#include "../precomp.hpp"
-#include "layers_common.hpp"
-#include "../op_inf_engine.hpp"
-#include <opencv2/imgproc.hpp>
-
-namespace cv { namespace dnn {
-
-class ResizeNearestNeighborLayerImpl CV_FINAL : public ResizeNearestNeighborLayer
-{
-public:
-    ResizeNearestNeighborLayerImpl(const LayerParams& params)
-    {
-        setParamsFrom(params);
-        CV_Assert(params.has("width") && params.has("height") || params.has("zoom_factor"));
-        CV_Assert(!params.has("width") && !params.has("height") || !params.has("zoom_factor"));
-        outWidth = params.get<float>("width", 0);
-        outHeight = params.get<float>("height", 0);
-        zoomFactor = params.get<int>("zoom_factor", 1);
-        alignCorners = params.get<bool>("align_corners", false);
-        if (alignCorners)
-            CV_Error(Error::StsNotImplemented, "Nearest neighborhood resize with align_corners=true is not implemented");
-    }
-
-    bool getMemoryShapes(const std::vector<MatShape> &inputs,
-                         const int requiredOutputs,
-                         std::vector<MatShape> &outputs,
-                         std::vector<MatShape> &internals) const CV_OVERRIDE
-    {
-        CV_Assert(inputs.size() == 1, inputs[0].size() == 4);
-        outputs.resize(1, inputs[0]);
-        outputs[0][2] = outHeight > 0 ? outHeight : (outputs[0][2] * zoomFactor);
-        outputs[0][3] = outWidth > 0 ? outWidth : (outputs[0][3] * zoomFactor);
-        // We can work in-place (do nothing) if input shape == output shape.
-        return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
-    }
-
-    virtual bool supportBackend(int backendId) CV_OVERRIDE
-    {
-        return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
-    }
-
-    virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
-    {
-        if (!outWidth && !outHeight)
-        {
-            outHeight = outputs[0].size[2];
-            outWidth = outputs[0].size[3];
-        }
-    }
-
-    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
-    {
-        CV_TRACE_FUNCTION();
-        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
-
-        Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
-    }
-
-    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
-    {
-        CV_TRACE_FUNCTION();
-        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
-
-        if (outHeight == inputs[0]->size[2] && outWidth == inputs[0]->size[3])
-            return;
-
-        Mat& inp = *inputs[0];
-        Mat& out = outputs[0];
-        for (size_t n = 0; n < inputs[0]->size[0]; ++n)
-        {
-            for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
-            {
-                resize(getPlane(inp, n, ch), getPlane(out, n, ch),
-                       Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
-            }
-        }
-    }
-
-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
-    {
-#ifdef HAVE_INF_ENGINE
-        InferenceEngine::LayerParams lp;
-        lp.name = name;
-        lp.type = "Resample";
-        lp.precision = InferenceEngine::Precision::FP32;
-
-        std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
-        ieLayer->params["type"] = "caffe.ResampleParameter.NEAREST";
-        ieLayer->params["antialias"] = "0";
-        ieLayer->params["width"] = cv::format("%d", outWidth);
-        ieLayer->params["height"] = cv::format("%d", outHeight);
-
-        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
-#endif  // HAVE_INF_ENGINE
-        return Ptr<BackendNode>();
-    }
-
-private:
-    int outWidth, outHeight, zoomFactor;
-    bool alignCorners;
-};
-
-
-Ptr<ResizeNearestNeighborLayer> ResizeNearestNeighborLayer::create(const LayerParams& params)
-{
-    return Ptr<ResizeNearestNeighborLayer>(new ResizeNearestNeighborLayerImpl(params));
-}
-
-}  // namespace dnn
-}  // namespace cv
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index f19daf9cc6..4bff84175d 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -1450,18 +1450,36 @@ void TFImporter::populateNet(Net dstNet)
             connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0);
             data_layouts[name] = DATA_LAYOUT_UNKNOWN;
         }
-        else if (type == "ResizeNearestNeighbor")
+        else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear")
         {
-            Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
-            CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
+            if (layer.input_size() == 2)
+            {
+                Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1));
+                CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2);
+                layerParams.set("height", outSize.at<int>(0, 0));
+                layerParams.set("width", outSize.at<int>(0, 1));
+            }
+            else if (layer.input_size() == 3)
+            {
+                Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1));
+                Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2));
+                CV_Assert(factorHeight.type() == CV_32SC1, factorHeight.total() == 1,
+                          factorWidth.type() == CV_32SC1, factorWidth.total() == 1);
+                layerParams.set("zoom_factor_x", factorWidth.at<int>(0));
+                layerParams.set("zoom_factor_y", factorHeight.at<int>(0));
+            }
+            else
+                CV_Assert(layer.input_size() == 2 || layer.input_size() == 3);
 
-            layerParams.set("height", outSize.at<int>(0, 0));
-            layerParams.set("width", outSize.at<int>(0, 1));
+            if (type == "ResizeNearestNeighbor")
+                layerParams.set("interpolation", "nearest");
+            else
+                layerParams.set("interpolation", "bilinear");
 
             if (hasLayerAttr(layer, "align_corners"))
                 layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b());
 
-            int id = dstNet.addLayer(name, "ResizeNearestNeighbor", layerParams);
+            int id = dstNet.addLayer(name, "Resize", layerParams);
             layer_id[name] = id;
 
             connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 2690d7242b..4f024114ef 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -317,6 +317,43 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
     normAssertDetections(ref, out, "", 0.9, 3.4e-3, 1e-2);
 }
 
+// inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
+// inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
+// outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
+//                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
+//                 feed_dict={'input_images:0': inp})
+// scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
+// geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
+// np.save('east_text_detection.scores.npy', scores)
+// np.save('east_text_detection.geometry.npy', geometry)
+TEST_P(Test_TensorFlow_nets, EAST_text_detection)
+{
+    std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
+    std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
+    std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
+    std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
+
+    Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
+
+    net.setPreferableTarget(GetParam());
+
+    Mat img = imread(imgPath);
+    Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
+    net.setInput(inp);
+
+    std::vector<Mat> outs;
+    std::vector<String> outNames(2);
+    outNames[0] = "feature_fusion/Conv_7/Sigmoid";
+    outNames[1] = "feature_fusion/concat_3";
+    net.forward(outs, outNames);
+
+    Mat scores = outs[0];
+    Mat geometry = outs[1];
+
+    normAssert(scores, blobFromNPY(refScoresPath), "scores");
+    normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
+}
+
 INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets());
 
 typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_fp16;
@@ -396,159 +433,10 @@ TEST(Test_TensorFlow, memory_read)
     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
 }
 
-// Test a custom layer.
-class ResizeBilinearLayer CV_FINAL : public Layer
-{
-public:
-    ResizeBilinearLayer(const LayerParams &params) : Layer(params),
-        outWidth(0), outHeight(0), factorWidth(1), factorHeight(1)
-    {
-        CV_Assert(!params.get<bool>("align_corners", false));
-        CV_Assert(!blobs.empty());
-
-        for (size_t i = 0; i < blobs.size(); ++i)
-            CV_Assert(blobs[i].type() == CV_32SC1);
-
-        if (blobs.size() == 1)
-        {
-            CV_Assert(blobs[0].total() == 2);
-            outHeight = blobs[0].at<int>(0, 0);
-            outWidth = blobs[0].at<int>(0, 1);
-        }
-        else
-        {
-            CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1);
-            factorHeight = blobs[0].at<int>(0, 0);
-            factorWidth = blobs[1].at<int>(0, 0);
-            outHeight = outWidth = 0;
-        }
-    }
-
-    static Ptr<Layer> create(LayerParams& params)
-    {
-        return Ptr<Layer>(new ResizeBilinearLayer(params));
-    }
-
-    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
-                                 const int requiredOutputs,
-                                 std::vector<std::vector<int> > &outputs,
-                                 std::vector<std::vector<int> > &internals) const CV_OVERRIDE
-    {
-        std::vector<int> outShape(4);
-        outShape[0] = inputs[0][0];  // batch size
-        outShape[1] = inputs[0][1];  // number of channels
-        outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight);
-        outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth);
-        outputs.assign(1, outShape);
-        return false;
-    }
-
-    virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
-    {
-        if (!outWidth && !outHeight)
-        {
-            outHeight = outputs[0].size[2];
-            outWidth = outputs[0].size[3];
-        }
-    }
-
-    // This implementation is based on a reference implementation from
-    // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
-    virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
-    {
-        Mat& inp = *inputs[0];
-        Mat& out = outputs[0];
-        const float* inpData = (float*)inp.data;
-        float* outData = (float*)out.data;
-
-        const int batchSize = inp.size[0];
-        const int numChannels = inp.size[1];
-        const int inpHeight = inp.size[2];
-        const int inpWidth = inp.size[3];
-
-        float heightScale = static_cast<float>(inpHeight) / outHeight;
-        float widthScale = static_cast<float>(inpWidth) / outWidth;
-        for (int b = 0; b < batchSize; ++b)
-        {
-            for (int y = 0; y < outHeight; ++y)
-            {
-                float input_y = y * heightScale;
-                int y0 = static_cast<int>(std::floor(input_y));
-                int y1 = std::min(y0 + 1, inpHeight - 1);
-                for (int x = 0; x < outWidth; ++x)
-                {
-                    float input_x = x * widthScale;
-                    int x0 = static_cast<int>(std::floor(input_x));
-                    int x1 = std::min(x0 + 1, inpWidth - 1);
-                    for (int c = 0; c < numChannels; ++c)
-                    {
-                        float interpolation =
-                            inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
-                            inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
-                            inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
-                            inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
-                        outData[offset(out.size, c, x, y, b)] = interpolation;
-                    }
-                }
-            }
-        }
-    }
-
-    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
-
-private:
-    static inline int offset(const MatSize& size, int c, int x, int y, int b)
-    {
-        return x + size[3] * (y + size[2] * (c + size[1] * b));
-    }
-
-    int outWidth, outHeight, factorWidth, factorHeight;
-};
-
 TEST(Test_TensorFlow, resize_bilinear)
 {
-    CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
     runTensorFlowNet("resize_bilinear");
     runTensorFlowNet("resize_bilinear_factor");
-    LayerFactory::unregisterLayer("ResizeBilinear");
-}
-
-// inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png')
-// inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3)
-// outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'),
-//                  sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')],
-//                 feed_dict={'input_images:0': inp})
-// scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2))
-// geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2))
-// np.save('east_text_detection.scores.npy', scores)
-// np.save('east_text_detection.geometry.npy', geometry)
-TEST(Test_TensorFlow, EAST_text_detection)
-{
-    CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
-    std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
-    std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
-    std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false);
-    std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false);
-
-    Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false));
-    net.setPreferableBackend(DNN_BACKEND_OPENCV);
-
-    Mat img = imread(imgPath);
-    Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false);
-    net.setInput(inp);
-
-    std::vector<Mat> outs;
-    std::vector<String> outNames(2);
-    outNames[0] = "feature_fusion/Conv_7/Sigmoid";
-    outNames[1] = "feature_fusion/concat_3";
-    net.forward(outs, outNames);
-
-    Mat scores = outs[0];
-    Mat geometry = outs[1];
-
-    normAssert(scores, blobFromNPY(refScoresPath), "scores");
-    normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
-    LayerFactory::unregisterLayer("ResizeBilinear");
 }
 
 }
diff --git a/samples/dnn/text_detection.cpp b/samples/dnn/text_detection.cpp
index 5abe6b6884..f69d13f124 100644
--- a/samples/dnn/text_detection.cpp
+++ b/samples/dnn/text_detection.cpp
@@ -2,8 +2,6 @@
 #include <opencv2/highgui.hpp>
 #include <opencv2/dnn.hpp>
 
-#include "custom_layers.hpp"
-
 using namespace cv;
 using namespace cv::dnn;
 
@@ -38,9 +36,6 @@ int main(int argc, char** argv)
     CV_Assert(parser.has("model"));
     String model = parser.get<String>("model");
 
-    // Register a custom layer.
-    CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
-
     // Load network.
     Net net = readNet(model);
 

From 1187a7fa345b98b49d366fb6f8857486168f636f Mon Sep 17 00:00:00 2001
From: rockzhan <rock8zhan@gmail.com>
Date: Thu, 7 Jun 2018 21:45:54 +0800
Subject: [PATCH 16/33] Merge pull request #11649 from rockzhan:dnn_dw_prelu

dnn: Fix output mismatch when forward dnn model contain [depthwise conv(group=1) + bn + prelu]  (#11649)

* this can make sure [depthwise conv(group=1) + bn + prelu] output not shift

* add TEST to show the output mismatch in [DWconv+Prelu]

* fix typo

* change loading image to init cvMat directly

* build runtime model, without loading external model

* remove whitespace

* change way to create a cvmat

* add bias_term, add target output

* fix [dwconv + prelu] value mismatch when no optimizations

* fix Test error when change output channels

* add parametric test

* change num_output to group value

* change conv code and change test back
---
 modules/dnn/src/layers/convolution_layer.cpp  |   5 +-
 modules/dnn/src/layers/layers_common.simd.hpp |  10 +-
 modules/dnn/test/test_layers.cpp              | 119 ++++++++++++++++++
 3 files changed, 129 insertions(+), 5 deletions(-)

diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index cb1ab51877..5eac3e90f3 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -737,8 +737,9 @@ public:
 
                             if( relu )
                             {
-                                r0 = relu[i];
-                                r1 = relu[i+1];
+                                r0 = relu[i]; r1 = relu[i+1];
+                                if( i+1 >= outCn )
+                                    r1 = r0;
                             }
 
                             int j = 0;
diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp
index bee3e912e1..b2c0aa6a29 100644
--- a/modules/dnn/src/layers/layers_common.simd.hpp
+++ b/modules/dnn/src/layers/layers_common.simd.hpp
@@ -101,9 +101,13 @@ void fastConv( const float* weights, size_t wstep, const float* bias,
 
         if( relu )
         {
-            r0 = relu[i];
-            r1 = relu[i+1];
-            r2 = relu[i+2];
+            r0 = relu[i]; r1 = relu[i+1]; r2 = relu[i+2];
+            if( i+2 >= outCn )
+            {
+                r2 = r1;
+                if( i+1 >= outCn )
+                    r2 = r1 = r0;
+            }
             vr0 = _mm_set1_ps(r0);
             vr1 = _mm_set1_ps(r1);
             vr2 = _mm_set1_ps(r2);
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 8a17d7a026..da7dd775a4 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -786,6 +786,125 @@ TEST(Layer_PriorBox, squares)
     normAssert(out.reshape(1, 4), target);
 }
 
+typedef TestWithParam<tuple<int, int> > Layer_Test_DWconv_Prelu;
+TEST_P(Layer_Test_DWconv_Prelu, Accuracy)
+{
+    // Test case
+    // input       img size 3x16x16  value all 1
+    //   |
+    //   v
+    // dw_conv     weight[0]=-1 weight[1]=-2 weight[2]=-3   bias={1,2,3}
+    //   |
+    //   v
+    // prelu       weight={1,2,3}
+    //   |
+    //   v
+    // output      out size 3x14x14  if right: out[0]=-8 out[0]=-32 out[0]=-72
+    //             but current opencv output: out[0]=-24 out[0]=-48 out[0]=-72
+
+    const int num_input = get<0>(GetParam());   //inpChannels
+    const int group = 3;                        //outChannels=group when group>1
+    const int num_output = get<1>(GetParam());
+    const int kernel_depth = num_input/group;
+    CV_Assert(num_output >= group, num_output % group == 0, num_input % group == 0);
+
+    Net net;
+    //layer 1: dwconv
+    LayerParams lp;
+    lp.name = "dwconv";
+    lp.type = "Convolution";
+    lp.set("kernel_size", 3);
+    lp.set("num_output", num_output);
+    lp.set("pad", 0);
+    lp.set("group", group);
+    lp.set("stride", 1);
+    lp.set("engine", "CAFFE");
+    lp.set("bias_term", "true");
+
+    std::vector<int> weightsShape(4);
+    weightsShape[0] = num_output;   // #outChannels
+    weightsShape[1] = kernel_depth; // #inpChannels / group
+    weightsShape[2] = 3;            // height
+    weightsShape[3] = 3;            // width
+    Mat weights(weightsShape, CV_32F, Scalar(1));
+
+    //assign weights
+    for (int i = 0; i < weightsShape[0]; ++i)
+    {
+        for (int j = 0; j < weightsShape[1]; ++j)
+        {
+            for (int k = 0; k < weightsShape[2]; ++k)
+            {
+                for (int l = 0; l < weightsShape[3]; ++l)
+                {
+                    weights.ptr<float>(i, j, k)[l]=-1*(i+1);
+                }
+            }
+        }
+    }
+    lp.blobs.push_back(weights);
+
+    //assign bias
+    Mat bias(1, num_output, CV_32F, Scalar(1));
+    for (int i = 0; i < 1; ++i)
+    {
+        for (int j = 0; j < num_output; ++j)
+        {
+            bias.ptr<float>(i)[j]=j+1;
+        }
+    }
+    lp.blobs.push_back(bias);
+    net.addLayerToPrev(lp.name, lp.type, lp);
+
+    //layer 2: prelu
+    LayerParams lpr;
+    lpr.name = "dw_relu";
+    lpr.type = "PReLU";
+    Mat weightsp(1, num_output, CV_32F, Scalar(1));
+
+    //assign weights
+    for (int i = 0; i < 1; ++i)
+    {
+        for (int j = 0; j < num_output; ++j)
+        {
+            weightsp.ptr<float>(i)[j]=j+1;
+        }
+    }
+
+    lpr.blobs.push_back(weightsp);
+    net.addLayerToPrev(lpr.name, lpr.type, lpr);
+
+    int shape[] = {1, num_input, 16, 16};
+    Mat in_blob(4, &shape[0], CV_32FC1, Scalar(1));
+
+    net.setInput(in_blob);
+    Mat out = net.forward();
+
+    //assign target
+    std::vector<int> outShape(4);
+    outShape[0] = 1;
+    outShape[1] = num_output;       // outChannels
+    outShape[2] = 14;          // height
+    outShape[3] = 14;          // width
+    Mat target(outShape, CV_32F, Scalar(1));
+    for (int i = 0; i < outShape[0]; ++i)
+    {
+        for (int j = 0; j < outShape[1]; ++j)
+        {
+            for (int k = 0; k < outShape[2]; ++k)
+            {
+                for (int l = 0; l < outShape[3]; ++l)
+                {
+                    target.ptr<float>(i, j, k)[l]=(-9*kernel_depth*(j+1)+j+1)*(j+1);
+                }
+            }
+        }
+    }
+
+    normAssert(out, target);
+}
+INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_DWconv_Prelu, Combine(Values(3, 6), Values(3, 6)));
+
 #ifdef HAVE_INF_ENGINE
 // Using Intel's Model Optimizer generate .xml and .bin files:
 // ./ModelOptimizer -w /path/to/caffemodel -d /path/to/prototxt \

From 8620bd5a84b03933fe24a3d8ba50715b617837b0 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Thu, 7 Jun 2018 19:25:01 +0300
Subject: [PATCH 17/33] highgui(win32): improve waitKey() timeout condition

- use cv::getTickCount() instead of Win32 GetTickCount()
- process message queue before timeout exit
---
 modules/highgui/src/window_w32.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp
index 5df18b3c03..ea7b461f90 100644
--- a/modules/highgui/src/window_w32.cpp
+++ b/modules/highgui/src/window_w32.cpp
@@ -1965,7 +1965,8 @@ static void showSaveDialog(CvWindow* window)
 CV_IMPL int
 cvWaitKey( int delay )
 {
-    int time0 = GetTickCount();
+    int64 time0 = cv::getTickCount();
+    int64 timeEnd = time0 + (int64)(delay * 0.001f * cv::getTickFrequency());
 
     for(;;)
     {
@@ -1973,13 +1974,13 @@ cvWaitKey( int delay )
         MSG message;
         int is_processed = 0;
 
-        if( (delay > 0 && abs((int)(GetTickCount() - time0)) >= delay) || hg_windows == 0 )
-            return -1;
-
         if( delay <= 0 )
             GetMessage(&message, 0, 0, 0);
         else if( PeekMessage(&message, 0, 0, 0, PM_REMOVE) == FALSE )
         {
+            int64 t = cv::getTickCount();
+            if (t - timeEnd >= 0)
+                return -1;  // no messages and no more time
             Sleep(1);
             continue;
         }

From b46fef327ea7c10c683e2d26586e235d02ac6831 Mon Sep 17 00:00:00 2001
From: gnthibault <gnthibault@gmail.com>
Date: Fri, 8 Jun 2018 11:22:36 +0200
Subject: [PATCH 18/33] Fixed Assertin error due to Size.area() overflowing

---
 modules/imgproc/src/resize.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp
index 211642c48d..0d57e1a933 100644
--- a/modules/imgproc/src/resize.cpp
+++ b/modules/imgproc/src/resize.cpp
@@ -3672,7 +3672,7 @@ void resize(int src_type,
 {
     CV_INSTRUMENT_REGION()
 
-    CV_Assert((dst_width * dst_height > 0) || (inv_scale_x > 0 && inv_scale_y > 0));
+    CV_Assert((dst_width > 0 && dst_height > 0) || (inv_scale_x > 0 && inv_scale_y > 0));
     if (inv_scale_x < DBL_EPSILON || inv_scale_y < DBL_EPSILON)
     {
         inv_scale_x = static_cast<double>(dst_width) / src_width;
@@ -3684,7 +3684,7 @@ void resize(int src_type,
     int  depth = CV_MAT_DEPTH(src_type), cn = CV_MAT_CN(src_type);
     Size dsize = Size(saturate_cast<int>(src_width*inv_scale_x),
                         saturate_cast<int>(src_height*inv_scale_y));
-    CV_Assert( dsize.area() > 0 );
+    CV_Assert( !dsize.empty() );
 
     CV_IPP_RUN_FAST(ipp_resize(src_data, src_step, src_width, src_height, dst_data, dst_step, dsize.width, dsize.height, inv_scale_x, inv_scale_y, depth, cn, interpolation))
 
@@ -4041,13 +4041,13 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
 
     Size ssize = _src.size();
 
-    CV_Assert( ssize.width > 0 && ssize.height > 0 );
-    CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) );
+    CV_Assert( !ssize.empty() );
+    CV_Assert( !dsize.empty() || (inv_scale_x > 0 && inv_scale_y > 0) );
     if( dsize.area() == 0 )
     {
         dsize = Size(saturate_cast<int>(ssize.width*inv_scale_x),
                      saturate_cast<int>(ssize.height*inv_scale_y));
-        CV_Assert( dsize.area() > 0 );
+        CV_Assert( !dsize.empty() );
     }
     else
     {

From a7047dd4b6dd864d0dc81785a2aaf87a33f0da58 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Fri, 8 Jun 2018 14:12:45 +0300
Subject: [PATCH 19/33] samples: add videocapture_camera sample

- show capturing information: width / height / fps
- show average FPS for cap.read()+imshow() via cv::getTickCount()
- optional frame processing code path
---
 samples/cpp/videocapture_camera.cpp | 76 +++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 samples/cpp/videocapture_camera.cpp

diff --git a/samples/cpp/videocapture_camera.cpp b/samples/cpp/videocapture_camera.cpp
new file mode 100644
index 0000000000..4d5a341657
--- /dev/null
+++ b/samples/cpp/videocapture_camera.cpp
@@ -0,0 +1,76 @@
+#include <opencv2/core.hpp>
+#include <opencv2/videoio.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>  // cv::Canny()
+#include <iostream>
+
+using namespace cv;
+using std::cout; using std::cerr; using std::endl;
+
+int main(int, char**)
+{
+    Mat frame;
+    cout << "Opening camera..." << endl;
+    VideoCapture capture(-1); // open the first available camera
+    if (!capture.isOpened())
+    {
+        cerr << "ERROR: Can't initialize camera capture" << endl;
+        return 1;
+    }
+
+    cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << endl;
+    cout << "     height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << endl;
+    cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << endl;
+
+    cout << endl << "Press 'ESC' to quit, 'space' to toggle frame processing" << endl;
+    cout << endl << "Start grabbing..." << endl;
+
+    size_t nFrames = 0;
+    bool enableProcessing = false;
+    int64 t0 = cv::getTickCount();
+    int64 processingTime = 0;
+    for (;;)
+    {
+        capture >> frame; // read the next frame from camera
+        if (frame.empty())
+        {
+            cerr << "ERROR: Can't grab camera frame." << endl;
+            break;
+        }
+        nFrames++;
+        if (nFrames % 10 == 0)
+        {
+            const int N = 10;
+            int64 t1 = cv::getTickCount();
+            cout << "Frames captured: " << cv::format("%5lld", (long long int)nFrames)
+                 << "    Average FPS: " << cv::format("%9.1f", (double)getTickFrequency() * N / (t1 - t0))
+                 << "    Average time per frame: " << cv::format("%9.2f ms", (double)(t1 - t0) * 1000.0f / (N * getTickFrequency()))
+                 << "    Average processing time: " << cv::format("%9.2f ms", (double)(processingTime) * 1000.0f / (N * getTickFrequency()))
+                 << std::endl;
+            t0 = t1;
+            processingTime = 0;
+        }
+        if (!enableProcessing)
+        {
+            imshow("Frame", frame);
+        }
+        else
+        {
+            int64 tp0 = cv::getTickCount();
+            Mat processed;
+            cv::Canny(frame, processed, 400, 1000, 5);
+            processingTime += cv::getTickCount() - tp0;
+            imshow("Frame", processed);
+        }
+        int key = waitKey(1);
+        if (key == 27/*ESC*/)
+            break;
+        if (key == 32/*SPACE*/)
+        {
+            enableProcessing = !enableProcessing;
+            cout << "Enable frame processing ('space' key): " << enableProcessing << endl;
+        }
+    }
+    std::cout << "Number of captured frames: " << nFrames << endl;
+    return nFrames > 0 ? 0 : 1;
+}

From 9a7f426b7bd7c1a44aec4f698968fbe04f7a36ba Mon Sep 17 00:00:00 2001
From: catree <catree.catreus@outlook.com>
Date: Fri, 8 Jun 2018 15:38:10 +0200
Subject: [PATCH 20/33] Add LD_PRELOAD instruction to be able to run Java code
 that calls OpenCV functions that use the Intel MKL library.

---
 .../introduction/java_eclipse/java_eclipse.markdown | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/tutorials/introduction/java_eclipse/java_eclipse.markdown b/doc/tutorials/introduction/java_eclipse/java_eclipse.markdown
index 89316de1cf..ed1f527217 100644
--- a/doc/tutorials/introduction/java_eclipse/java_eclipse.markdown
+++ b/doc/tutorials/introduction/java_eclipse/java_eclipse.markdown
@@ -86,3 +86,16 @@ When you run the code you should see 3x3 identity matrix as output.
 
 That is it, whenever you start a new project just add the OpenCV user library that you have defined
 to your project and you are good to go. Enjoy your powerful, less painful development environment :)
+
+Running Java code with OpenCV and MKL dependency
+------------------------------------------------
+
+You may get the following error (e.g. on Ubuntu) if you have built OpenCV with MKL library with some Java code that calls OpenCV functions
+that use Intel MKL:
+> Intel MKL FATAL ERROR: Cannot load libmkl_avx2.so or libmkl_def.so.
+
+One solution to solve this on Linux consists in preloading the Intel MKL library (either run the command in a terminal or add it to your `.bashrc` file).
+Your command line should be something similar to this (add `$LD_PRELOAD:` before if you have already set the `LD_PRELOAD` variable):
+> export LD_PRELOAD=/opt/intel/mkl/lib/intel64/libmkl_core.so:/opt/intel/mkl/lib/intel64/libmkl_sequential.so
+
+Then, run the Eclipse IDE from a terminal that have this environment variable set (`echo $LD_PRELOAD`) and the error should disappear.

From 40765c5f8d2476d91ac5aa85bf018f893b9442cd Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Fri, 8 Jun 2018 16:55:21 +0300
Subject: [PATCH 21/33] Enable SSD models from TensorFlow with OpenCL plugin of
 Intel's Inference Engine

---
 modules/dnn/perf/perf_net.cpp      | 20 ++++++----
 modules/dnn/test/test_backends.cpp | 39 +++++++++++--------
 modules/dnn/test/test_layers.cpp   |  1 +
 samples/dnn/tf_text_graph_ssd.py   | 61 +++++++++++++++++++-----------
 4 files changed, 76 insertions(+), 45 deletions(-)

diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index 8e777f8ae5..e8569dcf10 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -158,13 +158,19 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
             Mat(cv::Size(300, 300), CV_32FC3));
 }
 
-// TODO: update MobileNet model.
-PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
+PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
 {
-    if (backend == DNN_BACKEND_HALIDE ||
-        backend == DNN_BACKEND_INFERENCE_ENGINE)
+    if (backend == DNN_BACKEND_HALIDE)
+        throw SkipTestException("");
+    processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "ssd_mobilenet_v1_coco_2017_11_17.pbtxt", "",
+            Mat(cv::Size(300, 300), CV_32FC3));
+}
+
+PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
+{
+    if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
-    processNet("dnn/ssd_mobilenet_v1_coco.pb", "ssd_mobilenet_v1_coco.pbtxt", "",
+    processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "ssd_mobilenet_v2_coco_2018_03_29.pbtxt", "",
             Mat(cv::Size(300, 300), CV_32FC3));
 }
 
@@ -217,9 +223,7 @@ PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
 
 PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
 {
-    if (backend == DNN_BACKEND_HALIDE ||
-        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) ||
-        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16))
+    if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
     processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", "",
             Mat(cv::Size(300, 300), CV_32FC3));
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index 6f41610fb5..f6563cb3cb 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -38,7 +38,7 @@ public:
     void processNet(std::string weights, std::string proto,
                     Mat inp, const std::string& outputLayer = "",
                     std::string halideScheduler = "",
-                    double l1 = 0.0, double lInf = 0.0)
+                    double l1 = 0.0, double lInf = 0.0, double detectionConfThresh = 0.2)
     {
         if (backend == DNN_BACKEND_OPENCV && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
         {
@@ -87,7 +87,7 @@ public:
         }
         Mat out = net.forward(outputLayer).clone();
 
-        check(outDefault, out, outputLayer, l1, lInf, "First run");
+        check(outDefault, out, outputLayer, l1, lInf, detectionConfThresh, "First run");
 
         // Test 2: change input.
         float* inpData = (float*)inp.data;
@@ -101,10 +101,11 @@ public:
         net.setInput(inp);
         outDefault = netDefault.forward(outputLayer).clone();
         out = net.forward(outputLayer).clone();
-        check(outDefault, out, outputLayer, l1, lInf, "Second run");
+        check(outDefault, out, outputLayer, l1, lInf, detectionConfThresh, "Second run");
     }
 
-    void check(Mat& ref, Mat& out, const std::string& outputLayer, double l1, double lInf, const char* msg)
+    void check(Mat& ref, Mat& out, const std::string& outputLayer, double l1, double lInf,
+               double detectionConfThresh, const char* msg)
     {
         if (outputLayer == "detection_out")
         {
@@ -119,7 +120,7 @@ public:
                 }
                 out = out.rowRange(0, numDetections);
             }
-            normAssertDetections(ref, out, msg, 0.2, l1, lInf);
+            normAssertDetections(ref, out, msg, detectionConfThresh, l1, lInf);
         }
         else
             normAssert(ref, out, msg, l1, lInf);
@@ -188,20 +189,30 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
                inp, "detection_out", "", l1, lInf);
 }
 
-// TODO: update MobileNet model.
-TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
+TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow)
 {
-    if (backend == DNN_BACKEND_HALIDE ||
-        backend == DNN_BACKEND_INFERENCE_ENGINE)
+    if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
-    float l1 = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.008 : 0.0;
-    float lInf = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.06 : 0.0;
-    processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt",
+    float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
+    float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0;
+    processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt",
                inp, "detection_out", "", l1, lInf);
 }
 
+TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
+{
+    if (backend == DNN_BACKEND_HALIDE)
+        throw SkipTestException("");
+    Mat sample = imread(findDataFile("dnn/street.png", false));
+    Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
+    float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
+    float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0;
+    processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
+               inp, "detection_out", "", l1, lInf, 0.25);
+}
+
 TEST_P(DNNTestNetwork, SSD_VGG16)
 {
     if (backend == DNN_BACKEND_HALIDE && target == DNN_TARGET_CPU)
@@ -265,9 +276,7 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
 
 TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
 {
-    if (backend == DNN_BACKEND_HALIDE ||
-        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) ||
-        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16))
+    if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index da7dd775a4..111f354fe4 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -877,6 +877,7 @@ TEST_P(Layer_Test_DWconv_Prelu, Accuracy)
     int shape[] = {1, num_input, 16, 16};
     Mat in_blob(4, &shape[0], CV_32FC1, Scalar(1));
 
+    net.setPreferableBackend(DNN_BACKEND_OPENCV);
     net.setInput(in_blob);
     Mat out = net.forward();
 
diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py
index f5710476a7..851e0d881e 100644
--- a/samples/dnn/tf_text_graph_ssd.py
+++ b/samples/dnn/tf_text_graph_ssd.py
@@ -160,27 +160,40 @@ graph_def.node[1].input.append(weights)
 # Create SSD postprocessing head ###############################################
 
 # Concatenate predictions of classes, predictions of bounding boxes and proposals.
+def tensorMsg(values):
+    if all([isinstance(v, float) for v in values]):
+        dtype = 'DT_FLOAT'
+        field = 'float_val'
+    elif all([isinstance(v, int) for v in values]):
+        dtype = 'DT_INT32'
+        field = 'int_val'
+    else:
+        raise Exception('Wrong values types')
 
-concatAxis = NodeDef()
-concatAxis.name = 'concat/axis_flatten'
-concatAxis.op = 'Const'
-text_format.Merge(
-'tensor {'
-'  dtype: DT_INT32'
-'  tensor_shape { }'
-'  int_val: -1'
-'}', concatAxis.attr["value"])
-graph_def.node.extend([concatAxis])
-
-def addConcatNode(name, inputs):
+    msg = 'tensor { dtype: ' + dtype + ' tensor_shape { dim { size: %d } }' % len(values)
+    for value in values:
+        msg += '%s: %s ' % (field, str(value))
+    return msg + '}'
+
+def addConstNode(name, values):
+    node = NodeDef()
+    node.name = name
+    node.op = 'Const'
+    text_format.Merge(tensorMsg(values), node.attr["value"])
+    graph_def.node.extend([node])
+
+def addConcatNode(name, inputs, axisNodeName):
     concat = NodeDef()
     concat.name = name
     concat.op = 'ConcatV2'
     for inp in inputs:
         concat.input.append(inp)
-    concat.input.append(concatAxis.name)
+    concat.input.append(axisNodeName)
     graph_def.node.extend([concat])
 
+addConstNode('concat/axis_flatten', [-1])
+addConstNode('PriorBox/concat/axis', [-2])
+
 for label in ['ClassPredictor', 'BoxEncodingPredictor']:
     concatInputs = []
     for i in range(args.num_layers):
@@ -193,19 +206,14 @@ for label in ['ClassPredictor', 'BoxEncodingPredictor']:
 
         concatInputs.append(flatten.name)
         graph_def.node.extend([flatten])
-    addConcatNode('%s/concat' % label, concatInputs)
+    addConcatNode('%s/concat' % label, concatInputs, 'concat/axis_flatten')
 
 # Add layers that generate anchors (bounding boxes proposals).
 scales = [args.min_scale + (args.max_scale - args.min_scale) * i / (args.num_layers - 1)
           for i in range(args.num_layers)] + [1.0]
 
-def tensorMsg(values):
-    msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values)
-    for value in values:
-        msg += 'float_val: %f ' % value
-    return msg + '}'
-
 priorBoxes = []
+addConstNode('reshape_prior_boxes_to_4d', [1, 2, -1, 1])
 for i in range(args.num_layers):
     priorBox = NodeDef()
     priorBox.name = 'PriorBox_%d' % i
@@ -232,9 +240,18 @@ for i in range(args.num_layers):
     text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
 
     graph_def.node.extend([priorBox])
-    priorBoxes.append(priorBox.name)
 
-addConcatNode('PriorBox/concat', priorBoxes)
+    # Reshape from 1x2xN to 1x2xNx1
+    reshape = NodeDef()
+    reshape.name = priorBox.name + '/4d'
+    reshape.op = 'Reshape'
+    reshape.input.append(priorBox.name)
+    reshape.input.append('reshape_prior_boxes_to_4d')
+    graph_def.node.extend([reshape])
+
+    priorBoxes.append(reshape.name)
+
+addConcatNode('PriorBox/concat', priorBoxes, 'PriorBox/concat/axis')
 
 # Sigmoid for classes predictions and DetectionOutput layer
 sigmoid = NodeDef()

From c64f6b3c22c6a2877c369ad55a5c45344b7ced0d Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Fri, 8 Jun 2018 17:06:45 +0300
Subject: [PATCH 22/33] videoio(MSMF): refactor grabFrame() / retrieveFrame()
 methods

- no behavior changes in grabFrame()
- added Lock2D() support into retrieveFrame()
---
 modules/videoio/src/cap_msmf.cpp | 154 ++++++++++++++++++++-----------
 1 file changed, 102 insertions(+), 52 deletions(-)

diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp
index 0d824b52f6..05ac436da6 100644
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@@ -1045,24 +1045,31 @@ bool CvCapture_MSMF::open(const cv::String& _filename)
 
 bool CvCapture_MSMF::grabFrame()
 {
+    CV_TRACE_FUNCTION();
     if (isOpen)
     {
         DWORD streamIndex, flags;
         if (videoSample)
             videoSample.Reset();
         HRESULT hr;
-        while(SUCCEEDED(hr = videoFileSource->ReadSample(
-                                                            dwStreamIndex, // Stream index.
-                                                            0,             // Flags.
-                                                            &streamIndex,  // Receives the actual stream index.
-                                                            &flags,        // Receives status flags.
-                                                            &sampleTime,  // Receives the time stamp.
-                                                            &videoSample   // Receives the sample or NULL.
-                                                        )) &&
-              streamIndex == dwStreamIndex && !(flags & (MF_SOURCE_READERF_ERROR|MF_SOURCE_READERF_ALLEFFECTSREMOVED|MF_SOURCE_READERF_ENDOFSTREAM)) &&
-              !videoSample
-             )
+        for(;;)
         {
+            CV_TRACE_REGION("ReadSample");
+            if (!SUCCEEDED(hr = videoFileSource->ReadSample(
+                dwStreamIndex, // Stream index.
+                0,             // Flags.
+                &streamIndex,  // Receives the actual stream index.
+                &flags,        // Receives status flags.
+                &sampleTime,   // Receives the time stamp.
+                &videoSample   // Receives the sample or NULL.
+            )))
+                break;
+            if (streamIndex != dwStreamIndex)
+                break;
+            if (flags & (MF_SOURCE_READERF_ERROR | MF_SOURCE_READERF_ALLEFFECTSREMOVED | MF_SOURCE_READERF_ENDOFSTREAM))
+                break;
+            if (videoSample)
+                break;
             if (flags & MF_SOURCE_READERF_STREAMTICK)
             {
                 DebugPrintOut(L"\tStream tick detected. Retrying to grab the frame\n");
@@ -1115,57 +1122,100 @@ bool CvCapture_MSMF::grabFrame()
 
 bool CvCapture_MSMF::retrieveFrame(int, cv::OutputArray frame)
 {
-    DWORD bcnt;
-    if (videoSample && SUCCEEDED(videoSample->GetBufferCount(&bcnt)) && bcnt > 0)
+    CV_TRACE_FUNCTION();
+    do
     {
+        if (!videoSample)
+            break;
+
         _ComPtr<IMFMediaBuffer> buf = NULL;
-        if (SUCCEEDED(videoSample->GetBufferByIndex(0, &buf)))
+
+        CV_TRACE_REGION("get_contiguous_buffer");
+        if (!SUCCEEDED(videoSample->ConvertToContiguousBuffer(&buf)))
+        {
+            CV_TRACE_REGION("get_buffer");
+            DWORD bcnt = 0;
+            if (!SUCCEEDED(videoSample->GetBufferCount(&bcnt)))
+                break;
+            if (bcnt == 0)
+                break;
+            if (!SUCCEEDED(videoSample->GetBufferByIndex(0, &buf)))
+                break;
+        }
+
+        bool lock2d = false;
+        BYTE* ptr = NULL;
+        LONG pitch = 0;
+        DWORD maxsize = 0, cursize = 0;
+
+        // "For 2-D buffers, the Lock2D method is more efficient than the Lock method"
+        // see IMFMediaBuffer::Lock method documentation: https://msdn.microsoft.com/en-us/library/windows/desktop/bb970366(v=vs.85).aspx
+        _ComPtr<IMF2DBuffer> buffer2d;
+        if (convertFormat)
         {
-            DWORD maxsize, cursize;
-            BYTE* ptr = NULL;
-            if (SUCCEEDED(buf->Lock(&ptr, &maxsize, &cursize)))
+            if (SUCCEEDED(buf.As<IMF2DBuffer>(&buffer2d)))
             {
-                if (convertFormat)
+                CV_TRACE_REGION_NEXT("lock2d");
+                if (SUCCEEDED(buffer2d->Lock2D(&ptr, &pitch)))
                 {
-                    if ((unsigned int)cursize == captureFormat.MF_MT_SAMPLE_SIZE)
-                    {
-                        switch (outputFormat)
-                        {
-                        case CV_CAP_MODE_YUYV:
-                            cv::Mat(captureFormat.height, captureFormat.width, CV_8UC2, ptr).copyTo(frame);
-                            break;
-                        case CV_CAP_MODE_BGR:
-                            if (captureMode == MODE_HW)
-                                cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC4, ptr), frame, cv::COLOR_BGRA2BGR);
-                            else
-                                cv::Mat(captureFormat.height, captureFormat.width, CV_8UC3, ptr).copyTo(frame);
-                            break;
-                        case CV_CAP_MODE_RGB:
-                            if (captureMode == MODE_HW)
-                                cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC4, ptr), frame, cv::COLOR_BGRA2BGR);
-                            else
-                                cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC3, ptr), frame, cv::COLOR_BGR2RGB);
-                            break;
-                        case CV_CAP_MODE_GRAY:
-                            cv::Mat(captureFormat.height, captureFormat.width, CV_8UC1, ptr).copyTo(frame);
-                            break;
-                        default:
-                            frame.release();
-                            break;
-                        }
-                    }
-                    else
-                        frame.release();
+                    lock2d = true;
                 }
-                else
+            }
+        }
+        if (ptr == NULL)
+        {
+            CV_Assert(lock2d == false);
+            CV_TRACE_REGION_NEXT("lock");
+            if (!SUCCEEDED(buf->Lock(&ptr, &maxsize, &cursize)))
+            {
+                break;
+            }
+        }
+        if (!ptr)
+            break;
+        if (convertFormat)
+        {
+            if (lock2d || (unsigned int)cursize == captureFormat.MF_MT_SAMPLE_SIZE)
+            {
+                switch (outputFormat)
                 {
-                    cv::Mat(1, cursize, CV_8UC1, ptr).copyTo(frame);
+                case CV_CAP_MODE_YUYV:
+                    cv::Mat(captureFormat.height, captureFormat.width, CV_8UC2, ptr, pitch).copyTo(frame);
+                    break;
+                case CV_CAP_MODE_BGR:
+                    if (captureMode == MODE_HW)
+                        cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC4, ptr, pitch), frame, cv::COLOR_BGRA2BGR);
+                    else
+                        cv::Mat(captureFormat.height, captureFormat.width, CV_8UC3, ptr, pitch).copyTo(frame);
+                    break;
+                case CV_CAP_MODE_RGB:
+                    if (captureMode == MODE_HW)
+                        cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC4, ptr, pitch), frame, cv::COLOR_BGRA2BGR);
+                    else
+                        cv::cvtColor(cv::Mat(captureFormat.height, captureFormat.width, CV_8UC3, ptr, pitch), frame, cv::COLOR_BGR2RGB);
+                    break;
+                case CV_CAP_MODE_GRAY:
+                    cv::Mat(captureFormat.height, captureFormat.width, CV_8UC1, ptr, pitch).copyTo(frame);
+                    break;
+                default:
+                    frame.release();
+                    break;
                 }
-                buf->Unlock();
-                return !frame.empty();
             }
+            else
+                frame.release();
         }
-    }
+        else
+        {
+            cv::Mat(1, cursize, CV_8UC1, ptr, pitch).copyTo(frame);
+        }
+        CV_TRACE_REGION_NEXT("unlock");
+        if (lock2d)
+            buffer2d->Unlock2D();
+        else
+            buf->Unlock();
+        return !frame.empty();
+    } while (0);
 
     frame.release();
     return false;

From d3696f3ad226a8ef57282d8af122021b13af9b40 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Fri, 8 Jun 2018 18:47:00 +0300
Subject: [PATCH 23/33] ffmpeg: update OpenCV FFmpeg wrapper (3.4)

---
 3rdparty/ffmpeg/ffmpeg.cmake | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/3rdparty/ffmpeg/ffmpeg.cmake b/3rdparty/ffmpeg/ffmpeg.cmake
index 423b409128..8f6ffe5707 100644
--- a/3rdparty/ffmpeg/ffmpeg.cmake
+++ b/3rdparty/ffmpeg/ffmpeg.cmake
@@ -1,8 +1,8 @@
-# Binaries branch name: ffmpeg/master_20180220
-# Binaries were created for OpenCV: 9819ebc0954c2df62943ebbd5936d325e5dc89e1
-ocv_update(FFMPEG_BINARIES_COMMIT "0a0e88972a7ea97708378d0488a65f83e7cc5e69")
-ocv_update(FFMPEG_FILE_HASH_BIN32 "b8120c07962d591e2e9071a1bf566fd0")
-ocv_update(FFMPEG_FILE_HASH_BIN64 "dc9c50e7b05482acc25d6ce0ac61bf1d")
+# Binaries branch name: ffmpeg/3.4_20180608
+# Binaries were created for OpenCV: f5ddbbf65937d8f44e481e4ee1082961821f5c62
+ocv_update(FFMPEG_BINARIES_COMMIT "8041bd6f5ad37045c258904ba3030bb3442e3911")
+ocv_update(FFMPEG_FILE_HASH_BIN32 "fa5a2a4e2f37defcb95bde8ed145c2b3")
+ocv_update(FFMPEG_FILE_HASH_BIN64 "2cc08fc4fef8199fe80e0f126684834f")
 ocv_update(FFMPEG_FILE_HASH_CMAKE "3b90f67f4b429e77d3da36698cef700c")
 
 function(download_win_ffmpeg script_var)

From cb4b6bb2dc381b669a938877cebdf982313ba9ed Mon Sep 17 00:00:00 2001
From: Vitaly Tuzov <terfendail@mediana.jetos.com>
Date: Wed, 6 Jun 2018 16:00:55 +0300
Subject: [PATCH 24/33] Fixed setting of frame size properties for MSMF-based
 VideoCapture

---
 modules/videoio/src/cap_msmf.cpp | 176 ++++++++++++++-----------------
 1 file changed, 78 insertions(+), 98 deletions(-)

diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp
index 0d824b52f6..52d4c7f75f 100644
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@@ -720,6 +720,7 @@ protected:
     MediaType nativeFormat;
     MediaType captureFormat;
     int outputFormat;
+    UINT32 requestedWidth, requestedHeight;
     bool convertFormat;
     UINT32 aspectN, aspectD;
     MFTIME duration;
@@ -741,12 +742,15 @@ CvCapture_MSMF::CvCapture_MSMF():
     videoFileSource(NULL),
     videoSample(NULL),
     outputFormat(CV_CAP_MODE_BGR),
+    requestedWidth(0),
+    requestedHeight(0),
     convertFormat(true),
     aspectN(1),
     aspectD(1),
     sampleTime(0),
     isOpen(false)
 {
+    configureHW(true);
 }
 CvCapture_MSMF::CvCapture_MSMF(int index) : CvCapture_MSMF() { open(index); }
 CvCapture_MSMF::CvCapture_MSMF(const cv::String& _filename) : CvCapture_MSMF() { open(_filename); }
@@ -754,6 +758,7 @@ CvCapture_MSMF::CvCapture_MSMF(const cv::String& _filename) : CvCapture_MSMF() {
 CvCapture_MSMF::~CvCapture_MSMF()
 {
     close();
+    configureHW(false);
 }
 
 void CvCapture_MSMF::close()
@@ -823,6 +828,11 @@ bool CvCapture_MSMF::configureHW(bool enable)
 #endif
 }
 
+#define UDIFF(res, ref) (ref == 0 ? 0 : res > ref ? res - ref : ref - res)
+static UINT32 resolutionDiff(MediaType& mType, UINT32 refWidth, UINT32 refHeight)
+{ return UDIFF(mType.width, refWidth) + UDIFF(mType.height, refHeight); }
+#undef UDIFF
+
 bool CvCapture_MSMF::configureOutput(UINT32 width, UINT32 height, double prefFramerate, UINT32 aspectRatioN, UINT32 aspectRatioD, int outFormat, bool convertToFormat)
 {
     if (width != 0 && height != 0 &&
@@ -830,9 +840,10 @@ bool CvCapture_MSMF::configureOutput(UINT32 width, UINT32 height, double prefFra
         aspectRatioN == aspectN && aspectRatioD == aspectD && outFormat == outputFormat && convertToFormat == convertFormat)
         return true;
 
+    requestedWidth = width;
+    requestedHeight = height;
+
     HRESULT hr = S_OK;
-    int dwStreamFallback = -1;
-    MediaType MTFallback;
     int dwStreamBest = -1;
     MediaType MTBest;
 
@@ -853,31 +864,22 @@ bool CvCapture_MSMF::configureOutput(UINT32 width, UINT32 height, double prefFra
             MediaType MT(pType.Get());
             if (MT.MF_MT_MAJOR_TYPE == MFMediaType_Video)
             {
-                if (dwStreamFallback < 0 ||
-                    ((MT.width * MT.height) > (MTFallback.width * MTFallback.height)) ||
-                    (((MT.width * MT.height) == (MTFallback.width * MTFallback.height)) && getFramerate(MT) > getFramerate(MTFallback) && (prefFramerate == 0 || getFramerate(MT) <= prefFramerate)))
+                if (dwStreamBest < 0 ||
+                    resolutionDiff(MT, width, height) < resolutionDiff(MTBest, width, height) ||
+                    (resolutionDiff(MT, width, height) == resolutionDiff(MTBest, width, height) && MT.width > MTBest.width) ||
+                    (resolutionDiff(MT, width, height) == resolutionDiff(MTBest, width, height) && MT.width == MTBest.width && MT.height > MTBest.height) ||
+                    (MT.width == MTBest.width && MT.height == MTBest.height && (getFramerate(MT) > getFramerate(MTBest) && (prefFramerate == 0 || getFramerate(MT) <= prefFramerate)))
+                   )
                 {
-                    dwStreamFallback = (int)dwStreamTest;
-                    MTFallback = MT;
-                }
-                if (MT.width == width && MT.height == height)
-                {
-                    if (dwStreamBest < 0 ||
-                        (getFramerate(MT) > getFramerate(MTBest) && (prefFramerate == 0 || getFramerate(MT) <= prefFramerate)))
-                    {
-                        dwStreamBest = (int)dwStreamTest;
-                        MTBest = MT;
-                    }
+                    dwStreamBest = (int)dwStreamTest;
+                    MTBest = MT;
                 }
             }
             ++dwMediaTypeTest;
         }
     }
-    if (dwStreamBest >= 0 || dwStreamFallback >= 0)
+    if (dwStreamBest >= 0)
     {
-        // Retrieved stream media type
-        DWORD tryStream = (DWORD)(dwStreamBest >= 0 ? dwStreamBest : dwStreamFallback);
-        MediaType tryMT = dwStreamBest >= 0 ? MTBest : MTFallback;
         GUID outSubtype = GUID_NULL;
         UINT32 outStride = 0;
         UINT32 outSize = 0;
@@ -887,18 +889,18 @@ bool CvCapture_MSMF::configureOutput(UINT32 width, UINT32 height, double prefFra
             case CV_CAP_MODE_BGR:
             case CV_CAP_MODE_RGB:
                 outSubtype = captureMode == MODE_HW ? MFVideoFormat_RGB32 : MFVideoFormat_RGB24; // HW accelerated mode support only RGB32
-                outStride = (captureMode == MODE_HW ? 4 : 3) * tryMT.width;
-                outSize = outStride * tryMT.height;
+                outStride = (captureMode == MODE_HW ? 4 : 3) * MTBest.width;
+                outSize = outStride * MTBest.height;
                 break;
             case CV_CAP_MODE_GRAY:
                 outSubtype = MFVideoFormat_NV12;
-                outStride = tryMT.width;
-                outSize = outStride * tryMT.height * 3 / 2;
+                outStride = MTBest.width;
+                outSize = outStride * MTBest.height * 3 / 2;
                 break;
             case CV_CAP_MODE_YUYV:
                 outSubtype = MFVideoFormat_YUY2;
-                outStride = 2 * tryMT.width;
-                outSize = outStride * tryMT.height;
+                outStride = 2 * MTBest.width;
+                outSize = outStride * MTBest.height;
                 break;
             default:
                 return false;
@@ -907,21 +909,21 @@ bool CvCapture_MSMF::configureOutput(UINT32 width, UINT32 height, double prefFra
         if (// Set the output media type.
             SUCCEEDED(MFCreateMediaType(&mediaTypeOut)) &&
             SUCCEEDED(mediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)) &&
-            SUCCEEDED(mediaTypeOut->SetGUID(MF_MT_SUBTYPE, convertToFormat ? outSubtype : tryMT.MF_MT_SUBTYPE)) &&
-            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, convertToFormat ? MFVideoInterlace_Progressive : tryMT.MF_MT_INTERLACE_MODE)) &&
+            SUCCEEDED(mediaTypeOut->SetGUID(MF_MT_SUBTYPE, convertToFormat ? outSubtype : MTBest.MF_MT_SUBTYPE)) &&
+            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, convertToFormat ? MFVideoInterlace_Progressive : MTBest.MF_MT_INTERLACE_MODE)) &&
             SUCCEEDED(MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_PIXEL_ASPECT_RATIO, aspectRatioN, aspectRatioD)) &&
-            SUCCEEDED(MFSetAttributeSize(mediaTypeOut.Get(), MF_MT_FRAME_SIZE, tryMT.width, tryMT.height)) &&
-            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, convertToFormat ? 1 : tryMT.MF_MT_FIXED_SIZE_SAMPLES)) &&
-            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_SAMPLE_SIZE, convertToFormat ? outSize : tryMT.MF_MT_SAMPLE_SIZE)) &&
-            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_DEFAULT_STRIDE, convertToFormat ? outStride : tryMT.MF_MT_DEFAULT_STRIDE)))//Assume BGR24 input
+            SUCCEEDED(MFSetAttributeSize(mediaTypeOut.Get(), MF_MT_FRAME_SIZE, MTBest.width, MTBest.height)) &&
+            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, convertToFormat ? 1 : MTBest.MF_MT_FIXED_SIZE_SAMPLES)) &&
+            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_SAMPLE_SIZE, convertToFormat ? outSize : MTBest.MF_MT_SAMPLE_SIZE)) &&
+            SUCCEEDED(mediaTypeOut->SetUINT32(MF_MT_DEFAULT_STRIDE, convertToFormat ? outStride : MTBest.MF_MT_DEFAULT_STRIDE)))//Assume BGR24 input
         {
             if (SUCCEEDED(videoFileSource->SetStreamSelection((DWORD)MF_SOURCE_READER_ALL_STREAMS, false)) &&
-                SUCCEEDED(videoFileSource->SetStreamSelection(tryStream, true)) &&
-                SUCCEEDED(videoFileSource->SetCurrentMediaType(tryStream, NULL, mediaTypeOut.Get()))
+                SUCCEEDED(videoFileSource->SetStreamSelection((DWORD)dwStreamBest, true)) &&
+                SUCCEEDED(videoFileSource->SetCurrentMediaType((DWORD)dwStreamBest, NULL, mediaTypeOut.Get()))
                 )
             {
-                dwStreamIndex = tryStream;
-                nativeFormat = tryMT;
+                dwStreamIndex = (DWORD)dwStreamBest;
+                nativeFormat = MTBest;
                 aspectN = aspectRatioN;
                 aspectD = aspectRatioD;
                 outputFormat = outFormat;
@@ -976,7 +978,7 @@ bool CvCapture_MSMF::open(int _index)
                             {
                                 isOpen = true;
                                 duration = 0;
-                                if (configureOutput(0, 0, 0, aspectN, aspectD, outputFormat, convertFormat))
+                                if (configureOutput(640, 480, 0, aspectN, aspectD, outputFormat, convertFormat))
                                 {
                                     double fps = getFramerate(nativeFormat);
                                     frameStep = (LONGLONG)(fps > 0 ? 1e7 / fps : 0);
@@ -1208,19 +1210,19 @@ double CvCapture_MSMF::getProperty( int property_id ) const
     IAMVideoProcAmp *pProcAmp = NULL;
     IAMCameraControl *pProcControl = NULL;
     // image format properties
-    if (property_id == CV_CAP_PROP_FORMAT)
-        return outputFormat;
-    else if (property_id == CV_CAP_PROP_MODE)
-        return captureMode;
-    else if (property_id == CV_CAP_PROP_CONVERT_RGB)
-        return convertFormat ? 1 : 0;
-    else if (property_id == CV_CAP_PROP_SAR_NUM)
-        return aspectN;
-    else if (property_id == CV_CAP_PROP_SAR_DEN)
-        return aspectD;
-    else if (isOpen)
+    if (isOpen)
         switch (property_id)
         {
+        case CV_CAP_PROP_FORMAT:
+                return outputFormat;
+        case CV_CAP_PROP_MODE:
+                return captureMode;
+        case CV_CAP_PROP_CONVERT_RGB:
+                return convertFormat ? 1 : 0;
+        case CV_CAP_PROP_SAR_NUM:
+                return aspectN;
+        case CV_CAP_PROP_SAR_DEN:
+                return aspectD;
         case CV_CAP_PROP_FRAME_WIDTH:
             return captureFormat.width;
         case CV_CAP_PROP_FRAME_HEIGHT:
@@ -1510,64 +1512,42 @@ bool CvCapture_MSMF::setProperty( int property_id, double value )
     IAMVideoProcAmp *pProcAmp = NULL;
     IAMCameraControl *pProcControl = NULL;
     // image capture properties
-    if (property_id == CV_CAP_PROP_FORMAT)
-    {
-        if (isOpen)
-            return configureOutput(captureFormat.width, captureFormat.height, getFramerate(nativeFormat), aspectN, aspectD, (int)cvRound(value), convertFormat);
-        else
-            outputFormat = (int)cvRound(value);
-        return true;
-    }
-    else if (property_id == CV_CAP_PROP_MODE)
-    {
-        switch ((MSMFCapture_Mode)((int)value))
-        {
-        case MODE_SW:
-            return configureHW(false);
-        case MODE_HW:
-            return configureHW(true);
-        default:
-            return false;
-        }
-    }
-    else if (property_id == CV_CAP_PROP_CONVERT_RGB)
-    {
-        if (isOpen)
-            return configureOutput(captureFormat.width, captureFormat.height, getFramerate(nativeFormat), aspectN, aspectD, outputFormat, value != 0);
-        else
-            convertFormat = value != 0;
-        return true;
-    }
-    else if (property_id == CV_CAP_PROP_SAR_NUM && value > 0)
-    {
-        if (isOpen)
-            return configureOutput(captureFormat.width, captureFormat.height, getFramerate(nativeFormat), (UINT32)cvRound(value), aspectD, outputFormat, convertFormat);
-        else
-            aspectN = (UINT32)cvRound(value);
-        return true;
-    }
-    else if (property_id == CV_CAP_PROP_SAR_DEN && value > 0)
-    {
-        if (isOpen)
-            return configureOutput(captureFormat.width, captureFormat.height, getFramerate(nativeFormat), aspectN, (UINT32)cvRound(value), outputFormat, convertFormat);
-        else
-            aspectD = (UINT32)cvRound(value);
-        return true;
-    }
-    else if (isOpen)
+    if (isOpen)
         switch (property_id)
         {
-        case CV_CAP_PROP_FRAME_WIDTH:
+        case CV_CAP_PROP_MODE:
+            switch ((MSMFCapture_Mode)((int)value))
+            {
+            case MODE_SW:
+                return configureHW(false);
+            case MODE_HW:
+                return configureHW(true);
+            default:
+                return false;
+            }
+        case CV_CAP_PROP_FORMAT:
+            return configureOutput(requestedWidth, requestedHeight, getFramerate(nativeFormat), aspectN, aspectD, (int)cvRound(value), convertFormat);
+        case CV_CAP_PROP_CONVERT_RGB:
+            return configureOutput(requestedWidth, requestedHeight, getFramerate(nativeFormat), aspectN, aspectD, outputFormat, value != 0);
+        case CV_CAP_PROP_SAR_NUM:
             if (value > 0)
-                return configureOutput((UINT32)cvRound(value), captureFormat.height, getFramerate(nativeFormat), aspectN, aspectD, outputFormat, convertFormat);
+                return configureOutput(requestedWidth, requestedHeight, getFramerate(nativeFormat), (UINT32)cvRound(value), aspectD, outputFormat, convertFormat);
             break;
-        case CV_CAP_PROP_FRAME_HEIGHT:
+        case CV_CAP_PROP_SAR_DEN:
             if (value > 0)
-                return configureOutput(captureFormat.width, (UINT32)cvRound(value), getFramerate(nativeFormat), aspectN, aspectD, outputFormat, convertFormat);
+                return configureOutput(requestedWidth, requestedHeight, getFramerate(nativeFormat), aspectN, (UINT32)cvRound(value), outputFormat, convertFormat);
+            break;
+        case CV_CAP_PROP_FRAME_WIDTH:
+            if (value >= 0)
+                return configureOutput((UINT32)cvRound(value), requestedHeight, getFramerate(nativeFormat), aspectN, aspectD, outputFormat, convertFormat);
+            break;
+        case CV_CAP_PROP_FRAME_HEIGHT:
+            if (value >= 0)
+                return configureOutput(requestedWidth, (UINT32)cvRound(value), getFramerate(nativeFormat), aspectN, aspectD, outputFormat, convertFormat);
             break;
         case CV_CAP_PROP_FPS:
             if (value >= 0)
-                return configureOutput(captureFormat.width, captureFormat.height, value, aspectN, aspectD, outputFormat, convertFormat);
+                return configureOutput(requestedWidth, requestedHeight, value, aspectN, aspectD, outputFormat, convertFormat);
             break;
         case CV_CAP_PROP_FOURCC:
             break;

From afa5b0cc93ca185153de02007f2c435c8aed1979 Mon Sep 17 00:00:00 2001
From: catree <catree.catreus@outlook.com>
Date: Thu, 7 Jun 2018 20:14:16 +0200
Subject: [PATCH 25/33] Add Java and Python code for cascade classifier and HDR
 tutorials.

---
 .../py_face_detection.markdown                |   6 +-
 .../{ldr_debvec.jpg => ldr_debevec.jpg}       | Bin
 .../py_photo/py_hdr/py_hdr.markdown           |  34 ++--
 .../cascade_classifier.markdown               |  24 +++
 .../table_of_content_objdetect.markdown       |   2 +
 .../photo/hdr_imaging/hdr_imaging.markdown    | 190 +++++++++++++-----
 .../photo/table_of_content_photo.markdown     |   2 +
 .../objectDetection/objectDetection.cpp       |  63 +++---
 .../photo/hdr_imaging/hdr_imaging.cpp         |  25 ++-
 .../ObjectDetectionDemo.java                  |  98 +++++++++
 .../photo/hdr_imaging/HDRImagingDemo.java     | 102 ++++++++++
 .../cascade_classifier/objectDetection.py     |  61 ++++++
 .../photo/hdr_imaging/hdr_imaging.py          |  56 ++++++
 13 files changed, 560 insertions(+), 103 deletions(-)
 rename doc/py_tutorials/py_photo/py_hdr/images/{ldr_debvec.jpg => ldr_debevec.jpg} (100%)
 create mode 100644 samples/java/tutorial_code/objectDetection/cascade_classifier/ObjectDetectionDemo.java
 create mode 100644 samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java
 create mode 100644 samples/python/tutorial_code/objectDetection/cascade_classifier/objectDetection.py
 create mode 100644 samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py

diff --git a/doc/py_tutorials/py_objdetect/py_face_detection/py_face_detection.markdown b/doc/py_tutorials/py_objdetect/py_face_detection/py_face_detection.markdown
index c42ed7312f..3b4308a958 100644
--- a/doc/py_tutorials/py_objdetect/py_face_detection/py_face_detection.markdown
+++ b/doc/py_tutorials/py_objdetect/py_face_detection/py_face_detection.markdown
@@ -126,9 +126,9 @@ Result looks like below:
 Additional Resources
 --------------------
 
--#  Video Lecture on [Face Detection and Tracking](http://www.youtube.com/watch?v=WfdYYNamHZ8)
-2.  An interesting interview regarding Face Detection by [Adam
-    Harvey](http://www.makematics.com/research/viola-jones/)
+-#  Video Lecture on [Face Detection and Tracking](https://www.youtube.com/watch?v=WfdYYNamHZ8)
+-#  An interesting interview regarding Face Detection by [Adam
+    Harvey](https://web.archive.org/web/20171204220159/http://www.makematics.com/research/viola-jones/)
 
 Exercises
 ---------
diff --git a/doc/py_tutorials/py_photo/py_hdr/images/ldr_debvec.jpg b/doc/py_tutorials/py_photo/py_hdr/images/ldr_debevec.jpg
similarity index 100%
rename from doc/py_tutorials/py_photo/py_hdr/images/ldr_debvec.jpg
rename to doc/py_tutorials/py_photo/py_hdr/images/ldr_debevec.jpg
diff --git a/doc/py_tutorials/py_photo/py_hdr/py_hdr.markdown b/doc/py_tutorials/py_photo/py_hdr/py_hdr.markdown
index bbfbed4713..497c8a7b34 100644
--- a/doc/py_tutorials/py_photo/py_hdr/py_hdr.markdown
+++ b/doc/py_tutorials/py_photo/py_hdr/py_hdr.markdown
@@ -27,7 +27,7 @@ merged, it has to be converted back to 8-bit to view it on usual displays. This
 tonemapping. Additional complexities arise when objects of the scene or camera move between shots,
 since images with different exposures should be registered and aligned.
 
-In this tutorial we show 2 algorithms (Debvec, Robertson) to generate and display HDR image from an
+In this tutorial we show 2 algorithms (Debevec, Robertson) to generate and display HDR image from an
 exposure sequence, and demonstrate an alternative approach called exposure fusion (Mertens), that
 produces low dynamic range image and does not need the exposure times data.
 Furthermore, we estimate the camera response function (CRF) which is of great value for many computer
@@ -65,14 +65,14 @@ exposure_times = np.array([15.0, 2.5, 0.25, 0.0333], dtype=np.float32)
 ### 2. Merge exposures into HDR image
 
 In this stage we merge the exposure sequence into one HDR image, showing 2 possibilities
-which we have in OpenCV. The first method is Debvec and the second one is Robertson.
+which we have in OpenCV. The first method is Debevec and the second one is Robertson.
 Notice that the HDR image is of type float32, and not uint8, as it contains the
 full dynamic range of all exposure images.
 
 @code{.py}
 # Merge exposures to HDR image
-merge_debvec = cv.createMergeDebevec()
-hdr_debvec = merge_debvec.process(img_list, times=exposure_times.copy())
+merge_debevec = cv.createMergeDebevec()
+hdr_debevec = merge_debevec.process(img_list, times=exposure_times.copy())
 merge_robertson = cv.createMergeRobertson()
 hdr_robertson = merge_robertson.process(img_list, times=exposure_times.copy())
 @endcode
@@ -86,7 +86,7 @@ we will later have to clip the data in order to avoid overflow.
 @code{.py}
 # Tonemap HDR image
 tonemap1 = cv.createTonemapDurand(gamma=2.2)
-res_debvec = tonemap1.process(hdr_debvec.copy())
+res_debevec = tonemap1.process(hdr_debevec.copy())
 tonemap2 = cv.createTonemapDurand(gamma=1.3)
 res_robertson = tonemap2.process(hdr_robertson.copy())
 @endcode
@@ -111,11 +111,11 @@ integers in the range of [0..255].
 
 @code{.py}
 # Convert datatype to 8-bit and save
-res_debvec_8bit = np.clip(res_debvec*255, 0, 255).astype('uint8')
+res_debevec_8bit = np.clip(res_debevec*255, 0, 255).astype('uint8')
 res_robertson_8bit = np.clip(res_robertson*255, 0, 255).astype('uint8')
 res_mertens_8bit = np.clip(res_mertens*255, 0, 255).astype('uint8')
 
-cv.imwrite("ldr_debvec.jpg", res_debvec_8bit)
+cv.imwrite("ldr_debevec.jpg", res_debevec_8bit)
 cv.imwrite("ldr_robertson.jpg", res_robertson_8bit)
 cv.imwrite("fusion_mertens.jpg", res_mertens_8bit)
 @endcode
@@ -127,9 +127,9 @@ You can see the different results but consider that each algorithm have addition
 extra parameters that you should fit to get your desired outcome. Best practice is
 to try the different methods and see which one performs best for your scene.
 
-### Debvec:
+### Debevec:
 
-![image](images/ldr_debvec.jpg)
+![image](images/ldr_debevec.jpg)
 
 ### Robertson:
 
@@ -150,9 +150,9 @@ function and use it for the HDR merge.
 
 @code{.py}
 # Estimate camera response function (CRF)
-cal_debvec = cv.createCalibrateDebevec()
-crf_debvec = cal_debvec.process(img_list, times=exposure_times)
-hdr_debvec = merge_debvec.process(img_list, times=exposure_times.copy(), response=crf_debvec.copy())
+cal_debevec = cv.createCalibrateDebevec()
+crf_debevec = cal_debevec.process(img_list, times=exposure_times)
+hdr_debevec = merge_debevec.process(img_list, times=exposure_times.copy(), response=crf_debevec.copy())
 cal_robertson = cv.createCalibrateRobertson()
 crf_robertson = cal_robertson.process(img_list, times=exposure_times)
 hdr_robertson = merge_robertson.process(img_list, times=exposure_times.copy(), response=crf_robertson.copy())
@@ -166,12 +166,12 @@ For this sequence we got the following estimation:
 Additional Resources
 --------------------
 
-1. Paul E Debevec and Jitendra Malik. Recovering high dynamic range radiance maps from photographs. In ACM SIGGRAPH 2008 classes, page 31. ACM, 2008.
-2. Mark A Robertson, Sean Borman, and Robert L Stevenson. Dynamic range improvement through multiple exposures. In Image Processing, 1999. ICIP 99. Proceedings. 1999 International Conference on, volume 3, pages 159–163. IEEE, 1999.
-3. Tom Mertens, Jan Kautz, and Frank Van Reeth. Exposure fusion. In Computer Graphics and Applications, 2007. PG'07. 15th Pacific Conference on, pages 382–390. IEEE, 2007.
+1. Paul E Debevec and Jitendra Malik. Recovering high dynamic range radiance maps from photographs. In ACM SIGGRAPH 2008 classes, page 31. ACM, 2008. @cite DM97
+2. Mark A Robertson, Sean Borman, and Robert L Stevenson. Dynamic range improvement through multiple exposures. In Image Processing, 1999. ICIP 99. Proceedings. 1999 International Conference on, volume 3, pages 159–163. IEEE, 1999. @cite RB99
+3. Tom Mertens, Jan Kautz, and Frank Van Reeth. Exposure fusion. In Computer Graphics and Applications, 2007. PG'07. 15th Pacific Conference on, pages 382–390. IEEE, 2007. @cite MK07
 4. Images from [Wikipedia-HDR](https://en.wikipedia.org/wiki/High-dynamic-range_imaging)
 
 Exercises
 ---------
-1. Try all tonemap algorithms: [Drago](http://docs.opencv.org/master/da/d53/classcv_1_1TonemapDrago.html), [Durand](http://docs.opencv.org/master/da/d3d/classcv_1_1TonemapDurand.html), [Mantiuk](http://docs.opencv.org/master/de/d76/classcv_1_1TonemapMantiuk.html) and [Reinhard](http://docs.opencv.org/master/d0/dec/classcv_1_1TonemapReinhard.html).
-2. Try changing the parameters in the HDR calibration and tonemap methods.
\ No newline at end of file
+1. Try all tonemap algorithms: cv::TonemapDrago, cv::TonemapDurand, cv::TonemapMantiuk and cv::TonemapReinhard
+2. Try changing the parameters in the HDR calibration and tonemap methods.
diff --git a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.markdown b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.markdown
index 093588f88a..3c7bf6b90c 100644
--- a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.markdown
+++ b/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.markdown
@@ -17,9 +17,23 @@ Theory
 Code
 ----
 
+@add_toggle_cpp
 This tutorial code's is shown lines below. You can also download it from
 [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp)
 @include samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/objectDetection/cascade_classifier/ObjectDetectionDemo.java)
+@include samples/java/tutorial_code/objectDetection/cascade_classifier/ObjectDetectionDemo.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/objectDetection/cascade_classifier/objectDetection.py)
+@include samples/python/tutorial_code/objectDetection/cascade_classifier/objectDetection.py
+@end_toggle
 
 Explanation
 -----------
@@ -40,3 +54,13 @@ Result
     detection. For the eyes we keep using the file used in the tutorial.
 
     ![](images/Cascade_Classifier_Tutorial_Result_LBP.jpg)
+
+Additional Resources
+--------------------
+
+-#  Paul Viola and Michael J. Jones. Robust real-time face detection. International Journal of Computer Vision, 57(2):137–154, 2004. @cite Viola04
+-#  Rainer Lienhart and Jochen Maydt. An extended set of haar-like features for rapid object detection. In Image Processing. 2002. Proceedings. 2002 International Conference on, volume 1, pages I–900. IEEE, 2002. @cite Lienhart02
+-#  Video Lecture on [Face Detection and Tracking](https://www.youtube.com/watch?v=WfdYYNamHZ8)
+-#  An interesting interview regarding Face Detection by [Adam
+    Harvey](https://web.archive.org/web/20171204220159/http://www.makematics.com/research/viola-jones/)
+-#  [OpenCV Face Detection: Visualized](https://vimeo.com/12774628) on Vimeo by Adam Harvey
diff --git a/doc/tutorials/objdetect/table_of_content_objdetect.markdown b/doc/tutorials/objdetect/table_of_content_objdetect.markdown
index e8f4fbc1bf..0b019d88a5 100644
--- a/doc/tutorials/objdetect/table_of_content_objdetect.markdown
+++ b/doc/tutorials/objdetect/table_of_content_objdetect.markdown
@@ -5,6 +5,8 @@ Ever wondered how your digital camera detects peoples and faces? Look here to fi
 
 -   @subpage tutorial_cascade_classifier
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
diff --git a/doc/tutorials/photo/hdr_imaging/hdr_imaging.markdown b/doc/tutorials/photo/hdr_imaging/hdr_imaging.markdown
index 07f993da28..b26c8f4877 100644
--- a/doc/tutorials/photo/hdr_imaging/hdr_imaging.markdown
+++ b/doc/tutorials/photo/hdr_imaging/hdr_imaging.markdown
@@ -31,21 +31,51 @@ Exposure sequence
 Source Code
 -----------
 
-@include cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp
+@add_toggle_cpp
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp)
+@include samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java)
+@include samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py)
+@include samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py
+@end_toggle
+
+Sample images
+-------------
+
+Data directory that contains images, exposure times and `list.txt` file can be downloaded from
+[here](https://github.com/opencv/opencv_extra/tree/3.4/testdata/cv/hdr/exposures).
 
 Explanation
 -----------
 
--#  **Load images and exposure times**
-    @code{.cpp}
-    vector<Mat> images;
-    vector<float> times;
-    loadExposureSeq(argv[1], images, times);
-    @endcode
-    Firstly we load input images and exposure times from user-defined folder. The folder should
-    contain images and *list.txt* - file that contains file names and inverse exposure times.
+-   **Load images and exposure times**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp Load images and exposure times
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java Load images and exposure times
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py Load images and exposure times
+@end_toggle
+
+Firstly we load input images and exposure times from user-defined folder. The folder should
+contain images and *list.txt* - file that contains file names and inverse exposure times.
 
-    For our image sequence the list is following:
+For our image sequence the list is following:
     @code{.none}
     memorial00.png 0.03125
     memorial01.png 0.0625
@@ -53,53 +83,96 @@ Explanation
     memorial15.png 1024
     @endcode
 
--#  **Estimate camera response**
-    @code{.cpp}
-    Mat response;
-    Ptr<CalibrateDebevec> calibrate = createCalibrateDebevec();
-    calibrate->process(images, response, times);
-    @endcode
-    It is necessary to know camera response function (CRF) for a lot of HDR construction algorithms.
-    We use one of the calibration algorithms to estimate inverse CRF for all 256 pixel values.
-
--#  **Make HDR image**
-@code{.cpp}
-Mat hdr;
-Ptr<MergeDebevec> merge_debevec = createMergeDebevec();
-merge_debevec->process(images, hdr, times, response);
-@endcode
+-   **Estimate camera response**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp Estimate camera response
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java Estimate camera response
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py Estimate camera response
+@end_toggle
+
+It is necessary to know camera response function (CRF) for a lot of HDR construction algorithms.
+We use one of the calibration algorithms to estimate inverse CRF for all 256 pixel values.
+
+-   **Make HDR image**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp Make HDR image
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java Make HDR image
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py Make HDR image
+@end_toggle
+
 We use Debevec's weighting scheme to construct HDR image using response calculated in the previous
 item.
 
--#  **Tonemap HDR image**
-    @code{.cpp}
-    Mat ldr;
-    Ptr<TonemapDurand> tonemap = createTonemapDurand(2.2f);
-    tonemap->process(hdr, ldr);
-    @endcode
-    Since we want to see our results on common LDR display we have to map our HDR image to 8-bit range
-    preserving most details. It is the main goal of tonemapping methods. We use tonemapper with
-    bilateral filtering and set 2.2 as the value for gamma correction.
-
--#  **Perform exposure fusion**
-    @code{.cpp}
-    Mat fusion;
-    Ptr<MergeMertens> merge_mertens = createMergeMertens();
-    merge_mertens->process(images, fusion);
-    @endcode
-    There is an alternative way to merge our exposures in case when we don't need HDR image. This
-    process is called exposure fusion and produces LDR image that doesn't require gamma correction. It
-    also doesn't use exposure values of the photographs.
-
--#  **Write results**
-    @code{.cpp}
-    imwrite("fusion.png", fusion * 255);
-    imwrite("ldr.png", ldr * 255);
-    imwrite("hdr.hdr", hdr);
-    @endcode
-    Now it's time to look at the results. Note that HDR image can't be stored in one of common image
-    formats, so we save it to Radiance image (.hdr). Also all HDR imaging functions return results in
-    [0, 1] range so we should multiply result by 255.
+-   **Tonemap HDR image**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp Tonemap HDR image
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java Tonemap HDR image
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py Tonemap HDR image
+@end_toggle
+
+Since we want to see our results on common LDR display we have to map our HDR image to 8-bit range
+preserving most details. It is the main goal of tonemapping methods. We use tonemapper with
+bilateral filtering and set 2.2 as the value for gamma correction.
+
+-   **Perform exposure fusion**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp Perform exposure fusion
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java Perform exposure fusion
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py Perform exposure fusion
+@end_toggle
+
+There is an alternative way to merge our exposures in case when we don't need HDR image. This
+process is called exposure fusion and produces LDR image that doesn't require gamma correction. It
+also doesn't use exposure values of the photographs.
+
+-   **Write results**
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp Write results
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java Write results
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py Write results
+@end_toggle
+
+Now it's time to look at the results. Note that HDR image can't be stored in one of common image
+formats, so we save it to Radiance image (.hdr). Also all HDR imaging functions return results in
+[0, 1] range so we should multiply result by 255.
+
+You can try other tonemap algorithms: cv::TonemapDrago, cv::TonemapDurand, cv::TonemapMantiuk and cv::TonemapReinhard
+You can also adjust the parameters in the HDR calibration and tonemap methods for your own photos.
 
 Results
 -------
@@ -111,3 +184,12 @@ Results
 ### Exposure fusion
 
 ![](images/fusion.png)
+
+Additional Resources
+--------------------
+
+1. Paul E Debevec and Jitendra Malik. Recovering high dynamic range radiance maps from photographs. In ACM SIGGRAPH 2008 classes, page 31. ACM, 2008. @cite DM97
+2. Mark A Robertson, Sean Borman, and Robert L Stevenson. Dynamic range improvement through multiple exposures. In Image Processing, 1999. ICIP 99. Proceedings. 1999 International Conference on, volume 3, pages 159–163. IEEE, 1999. @cite RB99
+3. Tom Mertens, Jan Kautz, and Frank Van Reeth. Exposure fusion. In Computer Graphics and Applications, 2007. PG'07. 15th Pacific Conference on, pages 382–390. IEEE, 2007. @cite MK07
+4. [Wikipedia-HDR](https://en.wikipedia.org/wiki/High-dynamic-range_imaging)
+5. [Recovering High Dynamic Range Radiance Maps from Photographs (webpage)](http://www.pauldebevec.com/Research/HDR/)
diff --git a/doc/tutorials/photo/table_of_content_photo.markdown b/doc/tutorials/photo/table_of_content_photo.markdown
index 4e72029a18..357c36996e 100644
--- a/doc/tutorials/photo/table_of_content_photo.markdown
+++ b/doc/tutorials/photo/table_of_content_photo.markdown
@@ -5,6 +5,8 @@ Use OpenCV for advanced photo processing.
 
 -   @subpage tutorial_hdr_imaging
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 3.0
 
     *Author:* Fedor Morozov
diff --git a/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp b/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
index 6aa3f91a43..2115549e30 100644
--- a/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
+++ b/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
@@ -2,7 +2,7 @@
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 
-#include <stdio.h>
+#include <iostream>
 
 using namespace std;
 using namespace cv;
@@ -11,48 +11,63 @@ using namespace cv;
 void detectAndDisplay( Mat frame );
 
 /** Global variables */
-String face_cascade_name, eyes_cascade_name;
 CascadeClassifier face_cascade;
 CascadeClassifier eyes_cascade;
-String window_name = "Capture - Face detection";
 
 /** @function main */
 int main( int argc, const char** argv )
 {
     CommandLineParser parser(argc, argv,
-        "{help h||}"
-        "{face_cascade|../../data/haarcascades/haarcascade_frontalface_alt.xml|}"
-        "{eyes_cascade|../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml|}");
+                             "{help h||}"
+                             "{face_cascade|../../data/haarcascades/haarcascade_frontalface_alt.xml|Path to face cascade.}"
+                             "{eyes_cascade|../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml|Path to eyes cascade.}"
+                             "{camera|0|Camera device number.}");
 
     parser.about( "\nThis program demonstrates using the cv::CascadeClassifier class to detect objects (Face + eyes) in a video stream.\n"
                   "You can use Haar or LBP features.\n\n" );
     parser.printMessage();
 
-    face_cascade_name = parser.get<String>("face_cascade");
-    eyes_cascade_name = parser.get<String>("eyes_cascade");
-    VideoCapture capture;
-    Mat frame;
+    String face_cascade_name = parser.get<String>("face_cascade");
+    String eyes_cascade_name = parser.get<String>("eyes_cascade");
 
     //-- 1. Load the cascades
-    if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading face cascade\n"); return -1; };
-    if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading eyes cascade\n"); return -1; };
+    if( !face_cascade.load( face_cascade_name ) )
+    {
+        cout << "--(!)Error loading face cascade\n";
+        return -1;
+    };
+    if( !eyes_cascade.load( eyes_cascade_name ) )
+    {
+        cout << "--(!)Error loading eyes cascade\n";
+        return -1;
+    };
 
+    int camera_device = parser.get<int>("camera");
+    VideoCapture capture;
     //-- 2. Read the video stream
-    capture.open( 0 );
-    if ( ! capture.isOpened() ) { printf("--(!)Error opening video capture\n"); return -1; }
+    capture.open( camera_device );
+    if ( ! capture.isOpened() )
+    {
+        cout << "--(!)Error opening video capture\n";
+        return -1;
+    }
 
+    Mat frame;
     while ( capture.read(frame) )
     {
         if( frame.empty() )
         {
-            printf(" --(!) No captured frame -- Break!");
+            cout << "--(!) No captured frame -- Break!\n";
             break;
         }
 
         //-- 3. Apply the classifier to the frame
         detectAndDisplay( frame );
 
-        if( waitKey(10) == 27 ) { break; } // escape
+        if( waitKey(10) == 27 )
+        {
+            break; // escape
+        }
     }
     return 0;
 }
@@ -60,33 +75,33 @@ int main( int argc, const char** argv )
 /** @function detectAndDisplay */
 void detectAndDisplay( Mat frame )
 {
-    std::vector<Rect> faces;
     Mat frame_gray;
-
     cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
     equalizeHist( frame_gray, frame_gray );
 
     //-- Detect faces
-    face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size(60, 60) );
+    std::vector<Rect> faces;
+    face_cascade.detectMultiScale( frame_gray, faces );
 
     for ( size_t i = 0; i < faces.size(); i++ )
     {
         Point center( faces[i].x + faces[i].width/2, faces[i].y + faces[i].height/2 );
-        ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2 ), 0, 0, 360, Scalar( 255, 0, 255 ), 4, 8, 0 );
+        ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2 ), 0, 0, 360, Scalar( 255, 0, 255 ), 4 );
 
         Mat faceROI = frame_gray( faces[i] );
-        std::vector<Rect> eyes;
 
         //-- In each face, detect eyes
-        eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0 |CASCADE_SCALE_IMAGE, Size(30, 30) );
+        std::vector<Rect> eyes;
+        eyes_cascade.detectMultiScale( faceROI, eyes );
 
         for ( size_t j = 0; j < eyes.size(); j++ )
         {
             Point eye_center( faces[i].x + eyes[j].x + eyes[j].width/2, faces[i].y + eyes[j].y + eyes[j].height/2 );
             int radius = cvRound( (eyes[j].width + eyes[j].height)*0.25 );
-            circle( frame, eye_center, radius, Scalar( 255, 0, 0 ), 4, 8, 0 );
+            circle( frame, eye_center, radius, Scalar( 255, 0, 0 ), 4 );
         }
     }
+
     //-- Show what you got
-    imshow( window_name, frame );
+    imshow( "Capture - Face detection", frame );
 }
diff --git a/samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp b/samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp
index 6ff6687678..492e1524c7 100644
--- a/samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp
+++ b/samples/cpp/tutorial_code/photo/hdr_imaging/hdr_imaging.cpp
@@ -1,6 +1,7 @@
-#include <opencv2/photo.hpp>
+#include "opencv2/photo.hpp"
 #include "opencv2/imgcodecs.hpp"
-#include <opencv2/highgui.hpp>
+#include "opencv2/highgui.hpp"
+
 #include <vector>
 #include <iostream>
 #include <fstream>
@@ -10,38 +11,52 @@ using namespace std;
 
 void loadExposureSeq(String, vector<Mat>&, vector<float>&);
 
-int main(int, char**argv)
+int main(int argc, char**argv)
 {
+    CommandLineParser parser( argc, argv, "{@input | | Input directory that contains images and exposure times. }" );
+
+    //! [Load images and exposure times]
     vector<Mat> images;
     vector<float> times;
-    loadExposureSeq(argv[1], images, times);
+    loadExposureSeq(parser.get<String>( "@input" ), images, times);
+    //! [Load images and exposure times]
 
+    //! [Estimate camera response]
     Mat response;
     Ptr<CalibrateDebevec> calibrate = createCalibrateDebevec();
     calibrate->process(images, response, times);
+    //! [Estimate camera response]
 
+    //! [Make HDR image]
     Mat hdr;
     Ptr<MergeDebevec> merge_debevec = createMergeDebevec();
     merge_debevec->process(images, hdr, times, response);
+    //! [Make HDR image]
 
+    //! [Tonemap HDR image]
     Mat ldr;
     Ptr<TonemapDurand> tonemap = createTonemapDurand(2.2f);
     tonemap->process(hdr, ldr);
+    //! [Tonemap HDR image]
 
+    //! [Perform exposure fusion]
     Mat fusion;
     Ptr<MergeMertens> merge_mertens = createMergeMertens();
     merge_mertens->process(images, fusion);
+    //! [Perform exposure fusion]
 
+    //! [Write results]
     imwrite("fusion.png", fusion * 255);
     imwrite("ldr.png", ldr * 255);
     imwrite("hdr.hdr", hdr);
+    //! [Write results]
 
     return 0;
 }
 
 void loadExposureSeq(String path, vector<Mat>& images, vector<float>& times)
 {
-    path = path + std::string("/");
+    path = path + "/";
     ifstream list_file((path + "list.txt").c_str());
     string name;
     float val;
diff --git a/samples/java/tutorial_code/objectDetection/cascade_classifier/ObjectDetectionDemo.java b/samples/java/tutorial_code/objectDetection/cascade_classifier/ObjectDetectionDemo.java
new file mode 100644
index 0000000000..33e0b5152e
--- /dev/null
+++ b/samples/java/tutorial_code/objectDetection/cascade_classifier/ObjectDetectionDemo.java
@@ -0,0 +1,98 @@
+import java.util.List;
+
+import org.opencv.core.Core;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfRect;
+import org.opencv.core.Point;
+import org.opencv.core.Rect;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgproc.Imgproc;
+import org.opencv.objdetect.CascadeClassifier;
+import org.opencv.videoio.VideoCapture;
+
+class ObjectDetection {
+    public void detectAndDisplay(Mat frame, CascadeClassifier faceCascade, CascadeClassifier eyesCascade) {
+        Mat frameGray = new Mat();
+        Imgproc.cvtColor(frame, frameGray, Imgproc.COLOR_BGR2GRAY);
+        Imgproc.equalizeHist(frameGray, frameGray);
+
+        // -- Detect faces
+        MatOfRect faces = new MatOfRect();
+        faceCascade.detectMultiScale(frameGray, faces);
+
+        List<Rect> listOfFaces = faces.toList();
+        for (Rect face : listOfFaces) {
+            Point center = new Point(face.x + face.width / 2, face.y + face.height / 2);
+            Imgproc.ellipse(frame, center, new Size(face.width / 2, face.height / 2), 0, 0, 360,
+                    new Scalar(255, 0, 255));
+
+            Mat faceROI = frameGray.submat(face);
+
+            // -- In each face, detect eyes
+            MatOfRect eyes = new MatOfRect();
+            eyesCascade.detectMultiScale(faceROI, eyes);
+
+            List<Rect> listOfEyes = eyes.toList();
+            for (Rect eye : listOfEyes) {
+                Point eyeCenter = new Point(face.x + eye.x + eye.width / 2, face.y + eye.y + eye.height / 2);
+                int radius = (int) Math.round((eye.width + eye.height) * 0.25);
+                Imgproc.circle(frame, eyeCenter, radius, new Scalar(255, 0, 0), 4);
+            }
+        }
+
+        //-- Show what you got
+        HighGui.imshow("Capture - Face detection", frame );
+    }
+
+    public void run(String[] args) {
+        String filenameFaceCascade = args.length > 2 ? args[0] : "../../data/haarcascades/haarcascade_frontalface_alt.xml";
+        String filenameEyesCascade = args.length > 2 ? args[1] : "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml";
+        int cameraDevice = args.length > 2 ? Integer.parseInt(args[2]) : 0;
+
+        CascadeClassifier faceCascade = new CascadeClassifier();
+        CascadeClassifier eyesCascade = new CascadeClassifier();
+
+        if (!faceCascade.load(filenameFaceCascade)) {
+            System.err.println("--(!)Error loading face cascade: " + filenameFaceCascade);
+            System.exit(0);
+        }
+        if (!eyesCascade.load(filenameEyesCascade)) {
+            System.err.println("--(!)Error loading eyes cascade: " + filenameEyesCascade);
+            System.exit(0);
+        }
+
+        VideoCapture capture = new VideoCapture(cameraDevice);
+        if (!capture.isOpened()) {
+            System.err.println("--(!)Error opening video capture");
+            System.exit(0);
+        }
+
+        Mat frame = new Mat();
+        while (capture.read(frame)) {
+            if (frame.empty()) {
+                System.err.println("--(!) No captured frame -- Break!");
+                break;
+            }
+
+            //-- 3. Apply the classifier to the frame
+            detectAndDisplay(frame, faceCascade, eyesCascade);
+
+            if (HighGui.waitKey(10) == 27) {
+                break;// escape
+            }
+        }
+
+        System.exit(0);
+    }
+}
+
+public class ObjectDetectionDemo {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        new ObjectDetection().run(args);
+    }
+}
diff --git a/samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java b/samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java
new file mode 100644
index 0000000000..9d96d85f2f
--- /dev/null
+++ b/samples/java/tutorial_code/photo/hdr_imaging/HDRImagingDemo.java
@@ -0,0 +1,102 @@
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.photo.CalibrateDebevec;
+import org.opencv.photo.MergeDebevec;
+import org.opencv.photo.MergeMertens;
+import org.opencv.photo.Photo;
+import org.opencv.photo.TonemapDurand;
+
+class HDRImaging {
+    public void loadExposureSeq(String path, List<Mat> images, List<Float> times) {
+        path += "/";
+
+        List<String> lines;
+        try {
+            lines = Files.readAllLines(Paths.get(path + "list.txt"));
+
+            for (String line : lines) {
+                String[] splitStr = line.split("\\s+");
+                if (splitStr.length == 2) {
+                    String name = splitStr[0];
+                    Mat img = Imgcodecs.imread(path + name);
+                    images.add(img);
+                    float val = Float.parseFloat(splitStr[1]);
+                    times.add(1/ val);
+                }
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    public void run(String[] args) {
+        String path = args.length > 0 ? args[0] : "";
+        if (path.isEmpty()) {
+            System.out.println("Path is empty. Use the directory that contains images and exposure times.");
+            System.exit(0);
+        }
+
+        //! [Load images and exposure times]
+        List<Mat> images = new ArrayList<>();
+        List<Float> times = new ArrayList<>();
+        loadExposureSeq(path, images, times);
+        //! [Load images and exposure times]
+
+        //! [Estimate camera response]
+        Mat response = new Mat();
+        CalibrateDebevec calibrate = Photo.createCalibrateDebevec();
+        Mat matTimes = new Mat(times.size(), 1, CvType.CV_32F);
+        float[] arrayTimes = new float[(int) (matTimes.total()*matTimes.channels())];
+        for (int i = 0; i < times.size(); i++) {
+            arrayTimes[i] = times.get(i);
+        }
+        matTimes.put(0, 0, arrayTimes);
+        calibrate.process(images, response, matTimes);
+        //! [Estimate camera response]
+
+        //! [Make HDR image]
+        Mat hdr = new Mat();
+        MergeDebevec mergeDebevec = Photo.createMergeDebevec();
+        mergeDebevec.process(images, hdr, matTimes);
+        //! [Make HDR image]
+
+        //! [Tonemap HDR image]
+        Mat ldr = new Mat();
+        TonemapDurand tonemap = Photo.createTonemapDurand();
+        tonemap.process(hdr, ldr);
+        //! [Tonemap HDR image]
+
+        //! [Perform exposure fusion]
+        Mat fusion = new Mat();
+        MergeMertens mergeMertens = Photo.createMergeMertens();
+        mergeMertens.process(images, fusion);
+        //! [Perform exposure fusion]
+
+        //! [Write results]
+        fusion = fusion.mul(fusion, 255);
+        ldr = ldr.mul(ldr, 255);
+        Imgcodecs.imwrite("fusion.png", fusion);
+        Imgcodecs.imwrite("ldr.png", ldr);
+        Imgcodecs.imwrite("hdr.hdr", hdr);
+        //! [Write results]
+
+        System.exit(0);
+    }
+}
+
+public class HDRImagingDemo {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        new HDRImaging().run(args);
+    }
+}
diff --git a/samples/python/tutorial_code/objectDetection/cascade_classifier/objectDetection.py b/samples/python/tutorial_code/objectDetection/cascade_classifier/objectDetection.py
new file mode 100644
index 0000000000..343c9d66fa
--- /dev/null
+++ b/samples/python/tutorial_code/objectDetection/cascade_classifier/objectDetection.py
@@ -0,0 +1,61 @@
+from __future__ import print_function
+import cv2 as cv
+import argparse
+
+def detectAndDisplay(frame):
+    frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
+    frame_gray = cv.equalizeHist(frame_gray)
+
+    #-- Detect faces
+    faces = face_cascade.detectMultiScale(frame_gray)
+    for (x,y,w,h) in faces:
+        center = (x + w//2, y + h//2)
+        frame = cv.ellipse(frame, center, (w//2, h//2), 0, 0, 360, (255, 0, 255), 4)
+
+        faceROI = frame_gray[y:y+h,x:x+w]
+        #-- In each face, detect eyes
+        eyes = eyes_cascade.detectMultiScale(faceROI)
+        for (x2,y2,w2,h2) in eyes:
+            eye_center = (x + x2 + w2//2, y + y2 + h2//2)
+            radius = int(round((w2 + h2)*0.25))
+            frame = cv.circle(frame, eye_center, radius, (255, 0, 0 ), 4)
+
+    cv.imshow('Capture - Face detection', frame)
+
+parser = argparse.ArgumentParser(description='Code for Cascade Classifier tutorial.')
+parser.add_argument('--face_cascade', help='Path to face cascade.', default='../../data/haarcascades/haarcascade_frontalface_alt.xml')
+parser.add_argument('--eyes_cascade', help='Path to eyes cascade.', default='../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml')
+parser.add_argument('--camera', help='Camera devide number.', type=int, default=0)
+args = parser.parse_args()
+
+face_cascade_name = args.face_cascade
+eyes_cascade_name = args.eyes_cascade
+
+face_cascade = cv.CascadeClassifier()
+eyes_cascade = cv.CascadeClassifier()
+
+#-- 1. Load the cascades
+if not face_cascade.load(face_cascade_name):
+    print('--(!)Error loading face cascade')
+    exit(0)
+if not eyes_cascade.load(eyes_cascade_name):
+    print('--(!)Error loading eyes cascade')
+    exit(0)
+
+camera_device = args.camera
+#-- 2. Read the video stream
+cap = cv.VideoCapture(camera_device)
+if not cap.isOpened:
+    print('--(!)Error opening video capture')
+    exit(0)
+
+while True:
+    ret, frame = cap.read()
+    if frame is None:
+        print('--(!) No captured frame -- Break!')
+        break
+
+    detectAndDisplay(frame)
+
+    if cv.waitKey(10) == 27:
+        break
diff --git a/samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py b/samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py
new file mode 100644
index 0000000000..11243309b9
--- /dev/null
+++ b/samples/python/tutorial_code/photo/hdr_imaging/hdr_imaging.py
@@ -0,0 +1,56 @@
+from __future__ import print_function
+from __future__ import division
+import cv2 as cv
+import numpy as np
+import argparse
+import os
+
+def loadExposureSeq(path):
+    images = []
+    times = []
+    with open(os.path.join(path, 'list.txt')) as f:
+        content = f.readlines()
+    for line in content:
+        tokens = line.split()
+        images.append(cv.imread(os.path.join(path, tokens[0])))
+        times.append(1 / float(tokens[1]))
+
+    return images, np.asarray(times, dtype=np.float32)
+
+parser = argparse.ArgumentParser(description='Code for High Dynamic Range Imaging tutorial.')
+parser.add_argument('--input', type=str, help='Path to the directory that contains images and exposure times.')
+args = parser.parse_args()
+
+if not args.input:
+    parser.print_help()
+    exit(0)
+
+## [Load images and exposure times]
+images, times = loadExposureSeq(args.input)
+## [Load images and exposure times]
+
+## [Estimate camera response]
+calibrate = cv.createCalibrateDebevec()
+response = calibrate.process(images, times)
+## [Estimate camera response]
+
+## [Make HDR image]
+merge_debevec = cv.createMergeDebevec()
+hdr = merge_debevec.process(images, times, response)
+## [Make HDR image]
+
+## [Tonemap HDR image]
+tonemap = cv.createTonemapDurand(2.2)
+ldr = tonemap.process(hdr)
+## [Tonemap HDR image]
+
+## [Perform exposure fusion]
+merge_mertens = cv.createMergeMertens()
+fusion = merge_mertens.process(images)
+## [Perform exposure fusion]
+
+## [Write results]
+cv.imwrite('fusion.png', fusion * 255)
+cv.imwrite('ldr.png', ldr * 255)
+cv.imwrite('hdr.hdr', hdr)
+## [Write results]

From e8a520ed9662747970fd6b03fe742f116d93c83b Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Sat, 9 Jun 2018 12:48:26 +0300
Subject: [PATCH 26/33] cmake: add BUILD_APPS_LIST

- build "opencv_version" only: `cmake -DBUILD_APPS_LIST=version ...`
- option 'BUILD_opencv_apps' should be 'ON'
---
 apps/CMakeLists.txt | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt
index ed663752dd..95e98e6e08 100644
--- a/apps/CMakeLists.txt
+++ b/apps/CMakeLists.txt
@@ -3,9 +3,22 @@ add_definitions(-D__OPENCV_APPS=1)
 
 link_libraries(${OPENCV_LINKER_LIBS})
 
-add_subdirectory(traincascade)
-add_subdirectory(createsamples)
-add_subdirectory(annotation)
-add_subdirectory(visualisation)
-add_subdirectory(interactive-calibration)
-add_subdirectory(version)
+macro(ocv_add_app directory)
+  if(DEFINED BUILD_APPS_LIST)
+    list(FIND BUILD_APPS_LIST ${directory} _index)
+    if (${_index} GREATER -1)
+      add_subdirectory(${directory})
+    else()
+      message(STATUS "Skip OpenCV app: ${directory}")
+    endif()
+  else()
+    add_subdirectory(${directory})
+  endif()
+endmacro()
+
+ocv_add_app(traincascade)
+ocv_add_app(createsamples)
+ocv_add_app(annotation)
+ocv_add_app(visualisation)
+ocv_add_app(interactive-calibration)
+ocv_add_app(version)

From 390f0a5dcdc9951c3e485cf40c1ec66ddb1be845 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Sat, 9 Jun 2018 13:08:50 +0300
Subject: [PATCH 27/33] cmake: generate and install ffmpeg-download.ps1

64-bit only
---
 .gitattributes                         |  2 +
 3rdparty/ffmpeg/ffmpeg-download.ps1.in | 63 ++++++++++++++++++++++++++
 3rdparty/ffmpeg/ffmpeg.cmake           |  5 ++
 3 files changed, 70 insertions(+)
 create mode 100644 3rdparty/ffmpeg/ffmpeg-download.ps1.in

diff --git a/.gitattributes b/.gitattributes
index 94cf87928a..602676c977 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -81,6 +81,8 @@ org.eclipse.jdt.core.prefs -text whitespace=cr-at-eol merge=union
 *.cmd       text eol=crlf
 *.cmd.tmpl  text eol=crlf
 *.dsp       text eol=crlf -whitespace
+*.ps1       text eol=crlf
+*.ps1.in    text eol=crlf
 *.sln       text eol=crlf -whitespace
 *.vcproj    text eol=crlf -whitespace merge=union
 *.vcxproj   text eol=crlf -whitespace merge=union
diff --git a/3rdparty/ffmpeg/ffmpeg-download.ps1.in b/3rdparty/ffmpeg/ffmpeg-download.ps1.in
new file mode 100644
index 0000000000..589aa143e8
--- /dev/null
+++ b/3rdparty/ffmpeg/ffmpeg-download.ps1.in
@@ -0,0 +1,63 @@
+$url = "https://raw.githubusercontent.com/opencv/opencv_3rdparty/@FFMPEG_BINARIES_COMMIT@/ffmpeg/opencv_ffmpeg_64.dll"
+$expected_md5 = "@FFMPEG_FILE_HASH_BIN64@"
+$output = "$PSScriptRoot\@OPENCV_BIN_INSTALL_PATH@\opencv_ffmpeg@OPENCV_DLLVERSION@_64.dll"
+
+Write-Output ("=" * 120)
+try {
+    Get-content -Path "$PSScriptRoot\etc\licenses\ffmpeg-readme.txt" -ErrorAction 'Stop'
+} catch {
+    Write-Output "Refer to OpenCV FFmpeg wrapper readme notes about library usage / licensing details."
+}
+Write-Output ("=" * 120)
+Write-Output ""
+
+if(![System.IO.File]::Exists($output)) {
+    try {
+        Write-Output ("Downloading: " + $output)
+        Import-Module BitsTransfer
+        $start_time = Get-Date
+        Start-BitsTransfer -Source $url -Destination $output -ErrorAction 'Stop'
+        Write-Output "Downloaded in $((Get-Date).Subtract($start_time).Seconds) seconds"
+    } catch {
+        $_ # Dump error
+        try {
+            Write-Output ("Downloading (second attempt): " + $output)
+            $start_time = Get-Date
+            Invoke-WebRequest -Uri $url -OutFile $output
+            Write-Output "Downloaded in $((Get-Date).Subtract($start_time).Seconds) seconds"
+        } catch {
+            Write-Output ("Can't download file: " + $output)
+            Write-Output ("URL: " + $url)
+            Write-Output "You need to download this file manually. Stop"
+            Pause
+            Exit
+        }
+    }
+} else {
+    Write-Output ("File exists: " + $output)
+    Write-Output ("Downloading is skipped. Remove this file and re-run this script to force downloading.")
+}
+
+if(![System.IO.File]::Exists($output)) {
+    Write-Output ("Destination file not found: " + $output)
+    Write-Output "Stop"
+    Pause
+    Exit
+}
+
+try {
+    $hash = Get-FileHash $output -Algorithm MD5 -ErrorAction 'Stop'
+
+    if($hash.Hash -eq $expected_md5) {
+        Write-Output "MD5 check passed"
+    } else {
+        Write-Output ("MD5     : " + $hash.Hash.toLower())
+        Write-Output ("Expected: " + $expected_md5)
+        Write-Output "MD5 hash mismatch"
+    }
+} catch {
+    $_ # Dump error
+    Write-Output "Can't check MD5 hash (requires PowerShell 4+)"
+}
+Pause
+Write-Output "Exit"
diff --git a/3rdparty/ffmpeg/ffmpeg.cmake b/3rdparty/ffmpeg/ffmpeg.cmake
index 423b409128..5414b8f29e 100644
--- a/3rdparty/ffmpeg/ffmpeg.cmake
+++ b/3rdparty/ffmpeg/ffmpeg.cmake
@@ -35,3 +35,8 @@ function(download_win_ffmpeg script_var)
     set(${script_var} "${FFMPEG_DOWNLOAD_DIR}/ffmpeg_version.cmake" PARENT_SCOPE)
   endif()
 endfunction()
+
+if(OPENCV_INSTALL_FFMPEG_DOWNLOAD_SCRIPT)
+  configure_file("${CMAKE_CURRENT_LIST_DIR}/ffmpeg-download.ps1.in" "${CMAKE_BINARY_DIR}/win-install/ffmpeg-download.ps1" @ONLY)
+  install(FILES "${CMAKE_BINARY_DIR}/win-install/ffmpeg-download.ps1" DESTINATION "." COMPONENT libs)
+endif()

From 95360a553903f59379275adeb6137bd7a0e68f7a Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Sat, 9 Jun 2018 13:59:21 +0300
Subject: [PATCH 28/33] apps: add Win32 friendly opencv_version app

Improve experience of launching app from explorer:
- application just flash (open/close) the console window
Suggested Win32 application flavor additionally shows MessageBox
and waits for User interaction.
---
 apps/version/CMakeLists.txt     | 33 ++++++++++++++++------
 apps/version/opencv_version.cpp | 50 ++++++++++++++++++++++-----------
 2 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/apps/version/CMakeLists.txt b/apps/version/CMakeLists.txt
index 6ced527181..cc4abb33aa 100644
--- a/apps/version/CMakeLists.txt
+++ b/apps/version/CMakeLists.txt
@@ -1,19 +1,13 @@
-SET(OPENCV_APPLICATION_DEPS opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_videoio)
+set(OPENCV_APPLICATION_DEPS opencv_core)
 ocv_check_dependencies(${OPENCV_APPLICATION_DEPS})
-
 if(NOT OCV_DEPENDENCIES_FOUND)
   return()
 endif()
 
 project(opencv_version)
 set(the_target opencv_version)
-
-ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
 ocv_target_include_modules_recurse(${the_target} ${OPENCV_APPLICATION_DEPS})
-
-file(GLOB SRCS *.cpp)
-
-ocv_add_executable(${the_target} ${SRCS})
+ocv_add_executable(${the_target} opencv_version.cpp)
 ocv_target_link_libraries(${the_target} ${OPENCV_APPLICATION_DEPS})
 
 set_target_properties(${the_target} PROPERTIES
@@ -30,3 +24,26 @@ if(INSTALL_CREATE_DISTRIB)
 else()
   install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
 endif()
+
+if(WIN32)
+  project(opencv_version_win32)
+  set(the_target opencv_version_win32)
+  ocv_target_include_modules_recurse(${the_target} ${OPENCV_APPLICATION_DEPS})
+  ocv_add_executable(${the_target} opencv_version.cpp)
+  ocv_target_link_libraries(${the_target} ${OPENCV_APPLICATION_DEPS})
+  target_compile_definitions(${the_target} PRIVATE "OPENCV_WIN32_API=1")
+  set_target_properties(${the_target} PROPERTIES
+                        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+                        RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
+                        OUTPUT_NAME "opencv_version_win32")
+
+  set_target_properties(${the_target} PROPERTIES FOLDER "applications")
+
+  if(INSTALL_CREATE_DISTRIB)
+    if(BUILD_SHARED_LIBS)
+      install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT libs)
+    endif()
+  else()
+    install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
+  endif()
+endif()
diff --git a/apps/version/opencv_version.cpp b/apps/version/opencv_version.cpp
index 04aa25917f..8034f698c5 100644
--- a/apps/version/opencv_version.cpp
+++ b/apps/version/opencv_version.cpp
@@ -9,6 +9,31 @@
 
 #include <opencv2/core/opencl/opencl_info.hpp>
 
+#ifdef OPENCV_WIN32_API
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
+static void dumpHWFeatures(bool showAll = false)
+{
+    std::cout << "OpenCV's HW features list:" << std::endl;
+    int count = 0;
+    for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
+    {
+        cv::String name = cv::getHardwareFeatureName(i);
+        if (name.empty())
+            continue;
+        bool enabled = cv::checkHardwareSupport(i);
+        if (enabled)
+            count++;
+        if (enabled || showAll)
+        {
+            printf("    ID=%3d (%s) -> %s\n", i, name.c_str(), enabled ? "ON" : "N/A");
+        }
+    }
+    std::cout << "Total available: " << count << std::endl;
+}
+
 int main(int argc, const char** argv)
 {
     CV_TRACE_FUNCTION();
@@ -16,6 +41,7 @@ int main(int argc, const char** argv)
     CV_TRACE_ARG_VALUE(argv0, "argv0", argv[0]);
     CV_TRACE_ARG_VALUE(argv1, "argv1", argv[1]);
 
+#ifndef OPENCV_WIN32_API
     cv::CommandLineParser parser(argc, argv,
         "{ help h usage ? |      | show this help message }"
         "{ verbose v      |      | show build configuration log }"
@@ -45,24 +71,14 @@ int main(int argc, const char** argv)
 
     if (parser.has("hw"))
     {
-        bool showAll = parser.get<bool>("hw");
-        std::cout << "OpenCV's HW features list:" << std::endl;
-        int count = 0;
-        for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++)
-        {
-            cv::String name = cv::getHardwareFeatureName(i);
-            if (name.empty())
-                continue;
-            bool enabled = cv::checkHardwareSupport(i);
-            if (enabled)
-                count++;
-            if (enabled || showAll)
-            {
-                printf("    ID=%3d (%s) -> %s\n", i, name.c_str(), enabled ? "ON" : "N/A");
-            }
-        }
-        std::cout << "Total available: " << count << std::endl;
+        dumpHWFeatures(parser.get<bool>("hw"));
     }
+#else
+    std::cout << cv::getBuildInformation().c_str() << std::endl;
+    cv::dumpOpenCLInformation();
+    dumpHWFeatures();
+    MessageBoxA(NULL, "Check console window output", "OpenCV(" CV_VERSION ")", MB_ICONINFORMATION | MB_OK);
+#endif
 
     return 0;
 }

From 2c291bc2fba1273092de4b6feb5c71ee58271cae Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Sat, 9 Jun 2018 15:37:04 +0300
Subject: [PATCH 29/33] Enable FastNeuralStyle and OpenFace networks with IE
 backend

---
 modules/dnn/perf/perf_net.cpp                 | 12 +++-
 modules/dnn/src/layers/convolution_layer.cpp  |  7 ++-
 modules/dnn/src/layers/elementwise_layers.cpp | 56 ++++++++++++++++++-
 modules/dnn/src/layers/resize_layer.cpp       |  6 +-
 modules/dnn/src/layers/slice_layer.cpp        | 30 ++++++++++
 modules/dnn/test/test_backends.cpp            | 17 +++++-
 modules/dnn/test/test_darknet_importer.cpp    |  2 -
 modules/dnn/test/test_torch_importer.cpp      |  1 +
 8 files changed, 119 insertions(+), 12 deletions(-)

diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index e8569dcf10..16138cb99f 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -144,7 +144,8 @@ PERF_TEST_P_(DNNTestNetwork, SSD)
 PERF_TEST_P_(DNNTestNetwork, OpenFace)
 {
     if (backend == DNN_BACKEND_HALIDE ||
-        backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
+        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
         throw SkipTestException("");
     processNet("dnn/openface_nn4.small2.v1.t7", "", "",
             Mat(cv::Size(96, 96), CV_32FC3));
@@ -248,6 +249,15 @@ PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
     processNet("dnn/frozen_east_text_detection.pb", "", "", Mat(cv::Size(320, 320), CV_32FC3));
 }
 
+PERF_TEST_P_(DNNTestNetwork, FastNeuralStyle_eccv16)
+{
+    if (backend == DNN_BACKEND_HALIDE ||
+        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
+        throw SkipTestException("");
+    processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", "", Mat(cv::Size(320, 240), CV_32FC3));
+}
+
 const tuple<DNNBackend, DNNTarget> testCases[] = {
 #ifdef HAVE_HALIDE
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 5eac3e90f3..27818e5e7c 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -81,9 +81,10 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_HALIDE && haveHalide() ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
+            return preferableTarget != DNN_TARGET_MYRIAD || type != "Deconvolution" || adjustPad == Size();
+        else
+            return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
     }
 
     void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
index 801916d9c4..c95bdcd509 100644
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -115,9 +115,7 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_HALIDE && haveHalide() ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+        return func.supportBackend(backendId, this->preferableTarget);
     }
 
     virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
@@ -238,6 +236,12 @@ struct ReLUFunctor
 
     explicit ReLUFunctor(float slope_=1.f) : slope(slope_) {}
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         float s = slope;
@@ -353,6 +357,12 @@ struct ReLU6Functor
         CV_Assert(minValue <= maxValue);
     }
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
@@ -445,6 +455,12 @@ struct TanHFunctor
 {
     typedef TanHLayer Layer;
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
@@ -509,6 +525,12 @@ struct SigmoidFunctor
 {
     typedef SigmoidLayer Layer;
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
@@ -575,6 +597,11 @@ struct ELUFunctor
 
     explicit ELUFunctor() {}
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
@@ -638,6 +665,11 @@ struct AbsValFunctor
 {
     typedef AbsLayer Layer;
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
@@ -701,6 +733,11 @@ struct BNLLFunctor
 {
     typedef BNLLLayer Layer;
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize )
@@ -751,6 +788,14 @@ struct PowerFunctor
     explicit PowerFunctor(float power_ = 1.f, float scale_ = 1.f, float shift_ = 0.f)
         : power(power_), scale(scale_), shift(shift_) {}
 
+    bool supportBackend(int backendId, int targetId)
+    {
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
+            return (targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) || power == 1.0;
+        else
+            return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         float a = scale, b = shift, p = power;
@@ -853,6 +898,11 @@ struct ChannelsPReLUFunctor
         scale_umat = scale.getUMat(ACCESS_READ);
     }
 
+    bool supportBackend(int backendId, int)
+    {
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
+    }
+
     void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
     {
         CV_Assert(scale.isContinuous() && scale.type() == CV_32F);
diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp
index 82bc6542be..26aa311c25 100644
--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@@ -53,8 +53,10 @@ public:
 
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
-        return backendId == DNN_BACKEND_OPENCV ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && interpolation == "nearest";
+        if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
+            return interpolation == "nearest" && preferableTarget != DNN_TARGET_MYRIAD;
+        else
+            return backendId == DNN_BACKEND_OPENCV;
     }
 
     virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
index 4b3a975b2a..f6f4109c61 100644
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@ -41,6 +41,7 @@
 //M*/
 
 #include "../precomp.hpp"
+#include "../op_inf_engine.hpp"
 #include "layers_common.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 
@@ -107,6 +108,12 @@ public:
         }
     }
 
+    virtual bool supportBackend(int backendId) CV_OVERRIDE
+    {
+        return backendId == DNN_BACKEND_OPENCV ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && sliceRanges.size() == 1;
+    }
+
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
                             const int requiredOutputs,
                             std::vector<MatShape> &outputs,
@@ -247,6 +254,29 @@ public:
             inpMat(sliceRanges[i]).copyTo(outputs[i]);
         }
     }
+
+    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
+    {
+#ifdef HAVE_INF_ENGINE
+        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
+        InferenceEngine::LayerParams lp;
+        lp.name = name;
+        lp.type = "Crop";
+        lp.precision = InferenceEngine::Precision::FP32;
+        std::shared_ptr<InferenceEngine::CropLayer> ieLayer(new InferenceEngine::CropLayer(lp));
+
+        CV_Assert(sliceRanges.size() == 1);
+        for (int i = sliceRanges[0].size() - 1; i >= 0; --i)
+        {
+            ieLayer->axis.push_back(i);
+            ieLayer->offset.push_back(sliceRanges[0][i].start);
+            ieLayer->dim.push_back(sliceRanges[0][i].end - sliceRanges[0][i].start);
+        }
+        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
+
+#endif  // HAVE_INF_ENGINE
+        return Ptr<BackendNode>();
+    }
 };
 
 Ptr<SliceLayer> SliceLayer::create(const LayerParams& params)
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index f6563cb3cb..2549d7d352 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -256,7 +256,8 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
 TEST_P(DNNTestNetwork, OpenFace)
 {
     if (backend == DNN_BACKEND_HALIDE ||
-        backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
+        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
         throw SkipTestException("");
     processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), "");
 }
@@ -296,6 +297,20 @@ TEST_P(DNNTestNetwork, DenseNet_121)
     processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "caffe");
 }
 
+TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
+{
+    if (backend == DNN_BACKEND_HALIDE ||
+        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
+        throw SkipTestException("");
+    Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
+    Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false);
+    // Output image has values in range [-143.526, 148.539].
+    float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.3 : 4e-5;
+    float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7.0 : 2e-3;
+    processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
+}
+
 const tuple<DNNBackend, DNNTarget> testCases[] = {
 #ifdef HAVE_HALIDE
     tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index 3b6ca2a9b8..aaa7ef5c11 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -135,8 +135,6 @@ TEST_P(Test_Darknet_nets, YoloVoc)
 {
     int backendId = get<0>(GetParam());
     int targetId = get<1>(GetParam());
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
-        throw SkipTestException("");
     std::vector<cv::String> outNames(1, "detection_out");
 
     std::vector<int> classIds(3);
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
index cbff3cae37..a8c1d15503 100644
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -296,6 +296,7 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
         Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
 
         net.setInput(inputBlob);
+        net.setPreferableBackend(DNN_BACKEND_OPENCV);
         Mat out = net.forward();
 
         // Deprocessing.

From 0d249c7448ae6fdcbaf35f0684140952e562d227 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Sat, 9 Jun 2018 17:38:32 +0300
Subject: [PATCH 30/33] videoio(msmf): replace custom debug print function

---
 modules/videoio/src/cap_msmf.cpp | 48 ++++++--------------------------
 1 file changed, 8 insertions(+), 40 deletions(-)

diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp
index a0c82ed435..d57062eeb2 100644
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@@ -101,38 +101,6 @@ struct IMFAttributes;
 namespace
 {
 
-#ifdef _DEBUG
-void DPOprintOut(const wchar_t *format, ...)
-{
-    int i = 0;
-    wchar_t *p = NULL;
-    va_list args;
-    va_start(args, format);
-    if (::IsDebuggerPresent())
-    {
-        WCHAR szMsg[512];
-        ::StringCchVPrintfW(szMsg, sizeof(szMsg) / sizeof(szMsg[0]), format, args);
-        ::OutputDebugStringW(szMsg);
-    }
-    else
-    {
-        if (wcscmp(format, L"%i"))
-        {
-            i = va_arg(args, int);
-        }
-        if (wcscmp(format, L"%s"))
-        {
-            p = va_arg(args, wchar_t *);
-        }
-        wprintf(format, i, p);
-    }
-    va_end(args);
-}
-#define DebugPrintOut(...) DPOprintOut(__VA_ARGS__)
-#else
-#define DebugPrintOut(...) void()
-#endif
-
 template <class T>
 class ComPtr
 {
@@ -1074,7 +1042,7 @@ bool CvCapture_MSMF::grabFrame()
                 break;
             if (flags & MF_SOURCE_READERF_STREAMTICK)
             {
-                DebugPrintOut(L"\tStream tick detected. Retrying to grab the frame\n");
+                CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream tick detected. Retrying to grab the frame");
             }
         }
 
@@ -1082,38 +1050,38 @@ bool CvCapture_MSMF::grabFrame()
         {
             if (streamIndex != dwStreamIndex)
             {
-                DebugPrintOut(L"\tWrong stream readed. Abort capturing\n");
+                CV_LOG_DEBUG(NULL, "videoio(MSMF): Wrong stream readed. Abort capturing");
                 close();
             }
             else if (flags & MF_SOURCE_READERF_ERROR)
             {
-                DebugPrintOut(L"\tStream reading error. Abort capturing\n");
+                CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream reading error. Abort capturing");
                 close();
             }
             else if (flags & MF_SOURCE_READERF_ALLEFFECTSREMOVED)
             {
-                DebugPrintOut(L"\tStream decoding error. Abort capturing\n");
+                CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream decoding error. Abort capturing");
                 close();
             }
             else if (flags & MF_SOURCE_READERF_ENDOFSTREAM)
             {
                 sampleTime += frameStep;
-                DebugPrintOut(L"\tEnd of stream detected\n");
+                CV_LOG_DEBUG(NULL, "videoio(MSMF): End of stream detected");
             }
             else
             {
                 sampleTime += frameStep;
                 if (flags & MF_SOURCE_READERF_NEWSTREAM)
                 {
-                    DebugPrintOut(L"\tNew stream detected\n");
+                    CV_LOG_DEBUG(NULL, "videoio(MSMF): New stream detected");
                 }
                 if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED)
                 {
-                    DebugPrintOut(L"\tStream native media type changed\n");
+                    CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream native media type changed");
                 }
                 if (flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)
                 {
-                    DebugPrintOut(L"\tStream current media type changed\n");
+                    CV_LOG_DEBUG(NULL, "videoio(MSMF): Stream current media type changed");
                 }
                 return true;
             }

From 7d727ac2fb527814af477210ac05237168812ae2 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Sat, 9 Jun 2018 18:06:53 +0300
Subject: [PATCH 31/33] Fuse top layers to batch normalization

---
 modules/dnn/src/layers/batch_norm_layer.cpp | 40 +++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp
index d42face4ec..3b472328c8 100644
--- a/modules/dnn/src/layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@@ -96,6 +96,46 @@ public:
         shift = bias_;
     }
 
+    virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
+    {
+        Mat w, b;
+        top->getScaleShift(w, b);
+        if (w.empty() && b.empty())
+            return false;
+
+        const int numChannels = weights_.total();
+        const int numFusedWeights = w.total();
+        const int numFusedBias = b.total();
+
+        if ((numFusedWeights != numChannels && numFusedWeights != 1 && !w.empty()) ||
+            (numFusedBias != numChannels && numFusedBias != 1 && !b.empty()))
+            return false;
+
+        if (!w.empty())
+        {
+            w = w.reshape(1, 1);
+            if (numFusedWeights == 1)
+            {
+                multiply(weights_, w.at<float>(0), weights_);
+                multiply(bias_, w.at<float>(0), bias_);
+            }
+            else
+            {
+                multiply(weights_, w, weights_);
+                multiply(bias_, w, bias_);
+            }
+        }
+        if (!b.empty())
+        {
+            b = b.reshape(1, 1);
+            if (numFusedBias == 1)
+                add(bias_, b.at<float>(0), bias_);
+            else
+                add(bias_, b.reshape(1, 1), bias_);
+        }
+        return true;
+    }
+
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
                          const int requiredOutputs,
                          std::vector<MatShape> &outputs,

From a11ef2650eaee498759e046e3bf02ae4ec5c106a Mon Sep 17 00:00:00 2001
From: catree <catree.catreus@outlook.com>
Date: Sun, 10 Jun 2018 23:57:11 +0200
Subject: [PATCH 32/33] Add Java and Python code for the following imgproc
 tutorials: Finding contours in your image, Convex Hull, Creating Bounding
 boxes and circles for contours, Creating Bounding rotated boxes and ellipses
 for contours, Image Moments, Point Polygon Test.

---
 .../bounding_rects_circles.markdown           | 164 +++++++++++++---
 .../bounding_rotated_ellipses.markdown        |  14 ++
 .../find_contours/find_contours.markdown      |  14 ++
 .../shapedescriptors/hull/hull.markdown       |  15 +-
 .../shapedescriptors/moments/moments.markdown |  14 ++
 .../point_polygon_test.markdown               |  14 ++
 .../imgproc/table_of_content_imgproc.markdown |  12 ++
 .../ShapeDescriptors/findContours_demo.cpp    |  81 ++++----
 .../generalContours_demo1.cpp                 | 159 ++++++++--------
 .../generalContours_demo2.cpp                 | 110 +++++------
 .../ShapeDescriptors/hull_demo.cpp            |  89 +++++----
 .../ShapeDescriptors/moments_demo.cpp         | 138 +++++++-------
 .../pointPolygonTest_demo.cpp                 | 105 +++++-----
 .../GeneralContoursDemo1.java                 | 179 ++++++++++++++++++
 .../GeneralContoursDemo2.java                 | 176 +++++++++++++++++
 .../find_contours/FindContoursDemo.java       | 137 ++++++++++++++
 .../ShapeDescriptors/hull/HullDemo.java       | 154 +++++++++++++++
 .../ShapeDescriptors/moments/MomentsDemo.java | 178 +++++++++++++++++
 .../PointPolygonTestDemo.java                 |  93 +++++++++
 .../generalContours_demo1.py                  |  82 ++++++++
 .../generalContours_demo2.py                  |  82 ++++++++
 .../find_contours/findContours_demo.py        |  50 +++++
 .../ShapeDescriptors/hull/hull_demo.py        |  57 ++++++
 .../ShapeDescriptors/moments/moments_demo.py  |  83 ++++++++
 .../pointPolygonTest_demo.py                  |  51 +++++
 25 files changed, 1884 insertions(+), 367 deletions(-)
 create mode 100644 samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java
 create mode 100644 samples/java/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/GeneralContoursDemo2.java
 create mode 100644 samples/java/tutorial_code/ShapeDescriptors/find_contours/FindContoursDemo.java
 create mode 100644 samples/java/tutorial_code/ShapeDescriptors/hull/HullDemo.java
 create mode 100644 samples/java/tutorial_code/ShapeDescriptors/moments/MomentsDemo.java
 create mode 100644 samples/java/tutorial_code/ShapeDescriptors/point_polygon_test/PointPolygonTestDemo.java
 create mode 100644 samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py
 create mode 100644 samples/python/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/generalContours_demo2.py
 create mode 100644 samples/python/tutorial_code/ShapeDescriptors/find_contours/findContours_demo.py
 create mode 100644 samples/python/tutorial_code/ShapeDescriptors/hull/hull_demo.py
 create mode 100644 samples/python/tutorial_code/ShapeDescriptors/moments/moments_demo.py
 create mode 100644 samples/python/tutorial_code/ShapeDescriptors/point_polygon_test/pointPolygonTest_demo.py

diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown
index 978b900ab2..99cc2c146e 100644
--- a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown
+++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown
@@ -15,55 +15,167 @@ Theory
 Code
 ----
 
+@add_toggle_cpp
 This tutorial code's is shown lines below. You can also download it from
 [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp)
 @include samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java)
+@include samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py)
+@include samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py
+@end_toggle
 
 Explanation
 -----------
 
 The main function is rather simple, as follows from the comments we do the following:
--#  Open the image, convert it into grayscale and blur it to get rid of the noise.
-    @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp setup
--# Create a window with header "Source" and display the source file in it.
-    @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp createWindow
--# Create a trackbar on the source_window and assign a callback function to it
+-   Open the image, convert it into grayscale and blur it to get rid of the noise.
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp setup
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java setup
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py setup
+@end_toggle
+
+-  Create a window with header "Source" and display the source file in it.
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp createWindow
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java createWindow
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py createWindow
+@end_toggle
+
+-  Create a trackbar on the `source_window` and assign a callback function to it.
    In general callback functions are used to react to some kind of signal, in our
    case it's trackbar's state change.
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp taskbar
--# Explicit one-time call of `thresh_callback` is necessary to display
+   Explicit one-time call of `thresh_callback` is necessary to display
    the "Contours" window simultaniously with the "Source" window.
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp callback00
--# Wait for user to close the windows.
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp waitForIt
 
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp trackbar
+@end_toggle
 
-The callback function `thresh_callback` does all the interesting job.
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java trackbar
+@end_toggle
 
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py trackbar
+@end_toggle
 
--# Writes to `threshold_output` the threshold of the grayscale picture (you can check out about thresholding @ref tutorial_threshold "here").
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp threshold
--# Finds contours and saves them to the vectors `contour` and `hierarchy`.
-    @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp findContours
--# For every found contour we now apply approximation to polygons
-   with accuracy +-3 and stating that the curve must me closed.
+The callback function does all the interesting job.
 
-   After that we find a bounding rect for every polygon and save it to `boundRect`.
+-  Use @ref cv::Canny to detect edges in the images.
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp Canny
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java Canny
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py Canny
+@end_toggle
+
+-  Finds contours and saves them to the vectors `contour` and `hierarchy`.
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp findContours
+@end_toggle
 
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java findContours
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py findContours
+@end_toggle
+
+-  For every found contour we now apply approximation to polygons
+   with accuracy +-3 and stating that the curve must be closed.
+   After that we find a bounding rect for every polygon and save it to `boundRect`.
    At last we find a minimum enclosing circle for every polygon and
    save it to `center` and `radius` vectors.
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp allthework
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp allthework
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java allthework
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py allthework
+@end_toggle
 
 We found everything we need, all we have to do is to draw.
 
--# Create new Mat of unsigned 8-bit chars, filled with zeros.
+-  Create new Mat of unsigned 8-bit chars, filled with zeros.
    It will contain all the drawings we are going to make (rects and circles).
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp zeroMat
--# For every contour: pick a random color, draw the contour, the bounding rectangle and
-   the minimal enclosing circle with it,
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp forContour
--# Display the results: create a new window "Contours" and show everything we added to drawings on it.
-   @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp showDrawings
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp zeroMat
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java zeroMat
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py zeroMat
+@end_toggle
+
+-  For every contour: pick a random color, draw the contour, the bounding rectangle and
+   the minimal enclosing circle with it.
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp forContour
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java forContour
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py forContour
+@end_toggle
+
+-  Display the results: create a new window "Contours" and show everything we added to drawings on it.
+
+@add_toggle_cpp
+@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp showDrawings
+@end_toggle
+
+@add_toggle_java
+@snippet samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java showDrawings
+@end_toggle
+
+@add_toggle_python
+@snippet samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py showDrawings
+@end_toggle
 
 Result
 ------
diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown
index eb21cf5bc7..a482936d79 100644
--- a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown
+++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown
@@ -15,9 +15,23 @@ Theory
 Code
 ----
 
+@add_toggle_cpp
 This tutorial code's is shown lines below. You can also download it from
 [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp)
 @include samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/GeneralContoursDemo2.java)
+@include samples/java/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/GeneralContoursDemo2.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/generalContours_demo2.py)
+@include samples/python/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/generalContours_demo2.py
+@end_toggle
 
 Explanation
 -----------
diff --git a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown
index af467bdc72..c1171bb2aa 100644
--- a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown
+++ b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown
@@ -15,9 +15,23 @@ Theory
 Code
 ----
 
+@add_toggle_cpp
 This tutorial code's is shown lines below. You can also download it from
 [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp)
 @include samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/ShapeDescriptors/find_contours/FindContoursDemo.java)
+@include samples/java/tutorial_code/ShapeDescriptors/find_contours/FindContoursDemo.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/ShapeDescriptors/find_contours/findContours_demo.py)
+@include samples/python/tutorial_code/ShapeDescriptors/find_contours/findContours_demo.py
+@end_toggle
 
 Explanation
 -----------
diff --git a/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown b/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown
index cfb9241b81..7d7fae1b7c 100644
--- a/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown
+++ b/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown
@@ -14,10 +14,23 @@ Theory
 Code
 ----
 
+@add_toggle_cpp
 This tutorial code's is shown lines below. You can also download it from
 [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp)
-
 @include samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/ShapeDescriptors/hull/HullDemo.java)
+@include samples/java/tutorial_code/ShapeDescriptors/hull/HullDemo.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/ShapeDescriptors/hull/hull_demo.py)
+@include samples/python/tutorial_code/ShapeDescriptors/hull/hull_demo.py
+@end_toggle
 
 Explanation
 -----------
diff --git a/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown b/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown
index 231ff37500..de9e79ca86 100644
--- a/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown
+++ b/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown
@@ -16,9 +16,23 @@ Theory
 Code
 ----
 
+@add_toggle_cpp
 This tutorial code's is shown lines below. You can also download it from
 [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp)
 @include samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/ShapeDescriptors/moments/MomentsDemo.java)
+@include samples/java/tutorial_code/ShapeDescriptors/moments/MomentsDemo.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/ShapeDescriptors/moments/moments_demo.py)
+@include samples/python/tutorial_code/ShapeDescriptors/moments/moments_demo.py
+@end_toggle
 
 Explanation
 -----------
diff --git a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown
index 1f50410616..4a42eea836 100644
--- a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown
+++ b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown
@@ -14,9 +14,23 @@ Theory
 Code
 ----
 
+@add_toggle_cpp
 This tutorial code's is shown lines below. You can also download it from
 [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp)
 @include samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
+@end_toggle
+
+@add_toggle_java
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/java/tutorial_code/ShapeDescriptors/point_polygon_test/PointPolygonTestDemo.java)
+@include samples/java/tutorial_code/ShapeDescriptors/point_polygon_test/PointPolygonTestDemo.java
+@end_toggle
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/ShapeDescriptors/point_polygon_test/pointPolygonTest_demo.py)
+@include samples/python/tutorial_code/ShapeDescriptors/point_polygon_test/pointPolygonTest_demo.py
+@end_toggle
 
 Explanation
 -----------
diff --git a/doc/tutorials/imgproc/table_of_content_imgproc.markdown b/doc/tutorials/imgproc/table_of_content_imgproc.markdown
index fc93ae0e3d..e3fac55924 100644
--- a/doc/tutorials/imgproc/table_of_content_imgproc.markdown
+++ b/doc/tutorials/imgproc/table_of_content_imgproc.markdown
@@ -225,6 +225,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_find_contours
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -233,6 +235,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -    @subpage tutorial_hull
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -241,6 +245,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_bounding_rects_circles
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -249,6 +255,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_bounding_rotated_ellipses
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -257,6 +265,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_moments
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
@@ -265,6 +275,8 @@ In this section you will learn about the image processing (manipulation) functio
 
 -   @subpage tutorial_point_polygon_test
 
+    *Languages:* C++, Java, Python
+
     *Compatibility:* \> OpenCV 2.0
 
     *Author:* Ana Huamán
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp
index 233800e901..1083657ead 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp
@@ -12,9 +12,8 @@
 using namespace cv;
 using namespace std;
 
-Mat src; Mat src_gray;
+Mat src_gray;
 int thresh = 100;
-int max_thresh = 255;
 RNG rng(12345);
 
 /// Function header
@@ -25,34 +24,31 @@ void thresh_callback(int, void* );
  */
 int main( int argc, char** argv )
 {
-  /// Load source image
-  String imageName("../data/happyfish.jpg"); // by default
-  if (argc > 1)
-  {
-    imageName = argv[1];
-  }
-  src = imread(imageName, IMREAD_COLOR);
+    /// Load source image
+    CommandLineParser parser( argc, argv, "{@input | ../data/HappyFish.jpg | input image}" );
+    Mat src = imread( parser.get<String>( "@input" ) );
+    if( src.empty() )
+    {
+      cout << "Could not open or find the image!\n" << endl;
+      cout << "Usage: " << argv[0] << " <Input image>" << endl;
+      return -1;
+    }
 
-  if (src.empty())
-  {
-    cerr << "No image supplied ..." << endl;
-    return -1;
-  }
+    /// Convert image to gray and blur it
+    cvtColor( src, src_gray, COLOR_BGR2GRAY );
+    blur( src_gray, src_gray, Size(3,3) );
 
-  /// Convert image to gray and blur it
-  cvtColor( src, src_gray, COLOR_BGR2GRAY );
-  blur( src_gray, src_gray, Size(3,3) );
+    /// Create Window
+    const char* source_window = "Source";
+    namedWindow( source_window );
+    imshow( source_window, src );
 
-  /// Create Window
-  const char* source_window = "Source";
-  namedWindow( source_window, WINDOW_AUTOSIZE );
-  imshow( source_window, src );
+    const int max_thresh = 255;
+    createTrackbar( "Canny thresh:", source_window, &thresh, max_thresh, thresh_callback );
+    thresh_callback( 0, 0 );
 
-  createTrackbar( " Canny thresh:", "Source", &thresh, max_thresh, thresh_callback );
-  thresh_callback( 0, 0 );
-
-  waitKey(0);
-  return(0);
+    waitKey();
+    return 0;
 }
 
 /**
@@ -60,24 +56,23 @@ int main( int argc, char** argv )
  */
 void thresh_callback(int, void* )
 {
-  Mat canny_output;
-  vector<vector<Point> > contours;
-  vector<Vec4i> hierarchy;
+    /// Detect edges using Canny
+    Mat canny_output;
+    Canny( src_gray, canny_output, thresh, thresh*2 );
 
-  /// Detect edges using canny
-  Canny( src_gray, canny_output, thresh, thresh*2, 3 );
-  /// Find contours
-  findContours( canny_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) );
+    /// Find contours
+    vector<vector<Point> > contours;
+    vector<Vec4i> hierarchy;
+    findContours( canny_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE );
 
-  /// Draw contours
-  Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
-  for( size_t i = 0; i< contours.size(); i++ )
-     {
-       Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-       drawContours( drawing, contours, (int)i, color, 2, 8, hierarchy, 0, Point() );
-     }
+    /// Draw contours
+    Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
+    for( size_t i = 0; i< contours.size(); i++ )
+    {
+        Scalar color = Scalar( rng.uniform(0, 256), rng.uniform(0,256), rng.uniform(0,256) );
+        drawContours( drawing, contours, (int)i, color, 2, LINE_8, hierarchy, 0 );
+    }
 
-  /// Show in a window
-  namedWindow( "Contours", WINDOW_AUTOSIZE );
-  imshow( "Contours", drawing );
+    /// Show in a window
+    imshow( "Contours", drawing );
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp
index ea0b2a47ef..f8eb194378 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp
@@ -12,9 +12,8 @@
 using namespace cv;
 using namespace std;
 
-Mat src; Mat src_gray;
+Mat src_gray;
 int thresh = 100;
-int max_thresh = 255;
 RNG rng(12345);
 
 /// Function header
@@ -25,42 +24,37 @@ void thresh_callback(int, void* );
  */
 int main( int argc, char** argv )
 {
-  //![setup]
-  /// Load source image
-  CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
-  src = imread( parser.get<String>( "@input" ), IMREAD_COLOR );
-  if( src.empty() )
+    //! [setup]
+    /// Load source image
+    CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
+    Mat src = imread( parser.get<String>( "@input" ) );
+    if( src.empty() )
     {
-      cout << "Could not open or find the image!\n" << endl;
-      cout << "usage: " << argv[0] << " <Input image>" << endl;
-      return -1;
+        cout << "Could not open or find the image!\n" << endl;
+        cout << "usage: " << argv[0] << " <Input image>" << endl;
+        return -1;
     }
 
-  /// Convert image to gray and blur it
-  cvtColor( src, src_gray, COLOR_BGR2GRAY );
-  blur( src_gray, src_gray, Size(3,3) );
-  //![setup]
-
-  //![createWindow]
-  /// Create Window
-  const char* source_window = "Source";
-  namedWindow( source_window, WINDOW_AUTOSIZE );
-  imshow( source_window, src );
-  //![createWindow]
-
-  //![taskbar]
-  createTrackbar( " Threshold:", "Source", &thresh, max_thresh, thresh_callback );
-  //![taskbar]
-
-  //![callback00]
-  thresh_callback( 0, 0 );
-  //![callback00]
-
-  //![waitForIt]
-  waitKey(0);
-  //![waitForIt]
-
-  return(0);
+    /// Convert image to gray and blur it
+    cvtColor( src, src_gray, COLOR_BGR2GRAY );
+    blur( src_gray, src_gray, Size(3,3) );
+    //! [setup]
+
+    //! [createWindow]
+    /// Create Window
+    const char* source_window = "Source";
+    namedWindow( source_window );
+    imshow( source_window, src );
+    //! [createWindow]
+
+    //! [trackbar]
+    const int max_thresh = 255;
+    createTrackbar( "Canny thresh:", source_window, &thresh, max_thresh, thresh_callback );
+    thresh_callback( 0, 0 );
+    //! [trackbar]
+
+    waitKey();
+    return 0;
 }
 
 /**
@@ -68,53 +62,50 @@ int main( int argc, char** argv )
  */
 void thresh_callback(int, void* )
 {
-  Mat threshold_output;
-  vector<vector<Point> > contours;
-  vector<Vec4i> hierarchy;
-
-  //![threshold]
-  /// Detect edges using Threshold
-  threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
-  //![threshold]
-
-  //![findContours]
-  /// Find contours
-  findContours( threshold_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) );
-  //![findContours]
-
-  /// Approximate contours to polygons + get bounding rects and circles
-  vector<vector<Point> > contours_poly( contours.size() );
-  vector<Rect> boundRect( contours.size() );
-  vector<Point2f>center( contours.size() );
-  vector<float>radius( contours.size() );
-
-  //![allthework]
-  for( size_t i = 0; i < contours.size(); i++ )
-  {
-    approxPolyDP( contours[i], contours_poly[i], 3, true );
-    boundRect[i] = boundingRect( contours_poly[i] );
-    minEnclosingCircle( contours_poly[i], center[i], radius[i] );
-  }
-  //![allthework]
-
-  //![zeroMat]
-  /// Draw polygonal contour + bonding rects + circles
-  Mat drawing = Mat::zeros( threshold_output.size(), CV_8UC3 );
-  //![zeroMat]
-
-  //![forContour]
-  for( size_t i = 0; i< contours.size(); i++ )
-  {
-    Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-    drawContours( drawing, contours_poly, (int)i, color, 1, 8, vector<Vec4i>(), 0, Point() );
-    rectangle( drawing, boundRect[i].tl(), boundRect[i].br(), color, 2, 8, 0 );
-    circle( drawing, center[i], (int)radius[i], color, 2, 8, 0 );
-  }
-  //![forContour]
-
-  //![showDrawings]
-  /// Show in a window
-  namedWindow( "Contours", WINDOW_AUTOSIZE );
-  imshow( "Contours", drawing );
-  //![showDrawings]
+    //! [Canny]
+    /// Detect edges using Canny
+    Mat canny_output;
+    Canny( src_gray, canny_output, thresh, thresh*2 );
+    //! [Canny]
+
+    //! [findContours]
+    /// Find contours
+    vector<vector<Point> > contours;
+    findContours( canny_output, contours, RETR_TREE, CHAIN_APPROX_SIMPLE );
+    //! [findContours]
+
+    //! [allthework]
+    /// Approximate contours to polygons + get bounding rects and circles
+    vector<vector<Point> > contours_poly( contours.size() );
+    vector<Rect> boundRect( contours.size() );
+    vector<Point2f>centers( contours.size() );
+    vector<float>radius( contours.size() );
+
+    for( size_t i = 0; i < contours.size(); i++ )
+    {
+        approxPolyDP( contours[i], contours_poly[i], 3, true );
+        boundRect[i] = boundingRect( contours_poly[i] );
+        minEnclosingCircle( contours_poly[i], centers[i], radius[i] );
+    }
+    //! [allthework]
+
+    //! [zeroMat]
+    Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
+    //! [zeroMat]
+
+    //! [forContour]
+    /// Draw polygonal contour + bonding rects + circles
+    for( size_t i = 0; i< contours.size(); i++ )
+    {
+        Scalar color = Scalar( rng.uniform(0, 256), rng.uniform(0,256), rng.uniform(0,256) );
+        drawContours( drawing, contours_poly, (int)i, color );
+        rectangle( drawing, boundRect[i].tl(), boundRect[i].br(), color, 2 );
+        circle( drawing, centers[i], (int)radius[i], color, 2 );
+    }
+    //! [forContour]
+
+    //! [showDrawings]
+    /// Show in a window
+    imshow( "Contours", drawing );
+    //! [showDrawings]
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp
index 169f8bf4e5..2018b64bb2 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp
@@ -12,9 +12,8 @@
 using namespace cv;
 using namespace std;
 
-Mat src; Mat src_gray;
+Mat src_gray;
 int thresh = 100;
-int max_thresh = 255;
 RNG rng(12345);
 
 /// Function header
@@ -25,30 +24,31 @@ void thresh_callback(int, void* );
  */
 int main( int argc, char** argv )
 {
-  /// Load source image and convert it to gray
-  CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
-  src = imread( parser.get<String>( "@input" ), IMREAD_COLOR );
-  if( src.empty() )
+    /// Load source image and convert it to gray
+    CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
+    Mat src = imread( parser.get<String>( "@input" ) );
+    if( src.empty() )
     {
-      cout << "Could not open or find the image!\n" << endl;
-      cout << "Usage: " << argv[0] << " <Input image>" << endl;
-      return -1;
+        cout << "Could not open or find the image!\n" << endl;
+        cout << "Usage: " << argv[0] << " <Input image>" << endl;
+        return -1;
     }
 
-  /// Convert image to gray and blur it
-  cvtColor( src, src_gray, COLOR_BGR2GRAY );
-  blur( src_gray, src_gray, Size(3,3) );
+    /// Convert image to gray and blur it
+    cvtColor( src, src_gray, COLOR_BGR2GRAY );
+    blur( src_gray, src_gray, Size(3,3) );
 
-  /// Create Window
-  const char* source_window = "Source";
-  namedWindow( source_window, WINDOW_AUTOSIZE );
-  imshow( source_window, src );
+    /// Create Window
+    const char* source_window = "Source";
+    namedWindow( source_window );
+    imshow( source_window, src );
 
-  createTrackbar( " Threshold:", "Source", &thresh, max_thresh, thresh_callback );
-  thresh_callback( 0, 0 );
+    const int max_thresh = 255;
+    createTrackbar( "Canny thresh:", source_window, &thresh, max_thresh, thresh_callback );
+    thresh_callback( 0, 0 );
 
-  waitKey(0);
-  return(0);
+    waitKey();
+    return 0;
 }
 
 /**
@@ -56,41 +56,43 @@ int main( int argc, char** argv )
  */
 void thresh_callback(int, void* )
 {
-  Mat threshold_output;
-  vector<vector<Point> > contours;
-  vector<Vec4i> hierarchy;
+    /// Detect edges using Canny
+    Mat canny_output;
+    Canny( src_gray, canny_output, thresh, thresh*2 );
+    /// Find contours
+    vector<vector<Point> > contours;
+    findContours( canny_output, contours, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) );
 
-  /// Detect edges using Threshold
-  threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
-  /// Find contours
-  findContours( threshold_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) );
-
-  /// Find the rotated rectangles and ellipses for each contour
-  vector<RotatedRect> minRect( contours.size() );
-  vector<RotatedRect> minEllipse( contours.size() );
-
-  for( size_t i = 0; i < contours.size(); i++ )
-     { minRect[i] = minAreaRect( contours[i] );
-       if( contours[i].size() > 5 )
-         { minEllipse[i] = fitEllipse( contours[i] ); }
-     }
+    /// Find the rotated rectangles and ellipses for each contour
+    vector<RotatedRect> minRect( contours.size() );
+    vector<RotatedRect> minEllipse( contours.size() );
+    for( size_t i = 0; i < contours.size(); i++ )
+    {
+        minRect[i] = minAreaRect( contours[i] );
+        if( contours[i].size() > 5 )
+        {
+            minEllipse[i] = fitEllipse( contours[i] );
+        }
+    }
 
-  /// Draw contours + rotated rects + ellipses
-  Mat drawing = Mat::zeros( threshold_output.size(), CV_8UC3 );
-  for( size_t i = 0; i< contours.size(); i++ )
-     {
-       Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-       // contour
-       drawContours( drawing, contours, (int)i, color, 1, 8, vector<Vec4i>(), 0, Point() );
-       // ellipse
-       ellipse( drawing, minEllipse[i], color, 2, 8 );
-       // rotated rectangle
-       Point2f rect_points[4]; minRect[i].points( rect_points );
-       for( int j = 0; j < 4; j++ )
-          line( drawing, rect_points[j], rect_points[(j+1)%4], color, 1, 8 );
-     }
+    /// Draw contours + rotated rects + ellipses
+    Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
+    for( size_t i = 0; i< contours.size(); i++ )
+    {
+        Scalar color = Scalar( rng.uniform(0, 256), rng.uniform(0,256), rng.uniform(0,256) );
+        // contour
+        drawContours( drawing, contours, (int)i, color );
+        // ellipse
+        ellipse( drawing, minEllipse[i], color, 2 );
+        // rotated rectangle
+        Point2f rect_points[4];
+        minRect[i].points( rect_points );
+        for ( int j = 0; j < 4; j++ )
+        {
+            line( drawing, rect_points[j], rect_points[(j+1)%4], color );
+        }
+    }
 
-  /// Show in a window
-  namedWindow( "Contours", WINDOW_AUTOSIZE );
-  imshow( "Contours", drawing );
+    /// Show in a window
+    imshow( "Contours", drawing );
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp
index c1559088ba..6640286feb 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp
@@ -12,9 +12,8 @@
 using namespace cv;
 using namespace std;
 
-Mat src; Mat src_gray;
+Mat src_gray;
 int thresh = 100;
-int max_thresh = 255;
 RNG rng(12345);
 
 /// Function header
@@ -25,30 +24,31 @@ void thresh_callback(int, void* );
  */
 int main( int argc, char** argv )
 {
-  /// Load source image and convert it to gray
-  CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
-  src = imread( parser.get<String>( "@input" ), IMREAD_COLOR );
-  if( src.empty() )
-  {
-    cout << "Could not open or find the image!\n" << endl;
-    cout << "Usage: " << argv[0] << " <Input image>" << endl;
-    return -1;
-  }
+    /// Load source image and convert it to gray
+    CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
+    Mat src = imread( parser.get<String>( "@input" ) );
+    if( src.empty() )
+    {
+        cout << "Could not open or find the image!\n" << endl;
+        cout << "Usage: " << argv[0] << " <Input image>" << endl;
+        return -1;
+    }
 
-  /// Convert image to gray and blur it
-  cvtColor( src, src_gray, COLOR_BGR2GRAY );
-  blur( src_gray, src_gray, Size(3,3) );
+    /// Convert image to gray and blur it
+    cvtColor( src, src_gray, COLOR_BGR2GRAY );
+    blur( src_gray, src_gray, Size(3,3) );
 
-  /// Create Window
-  const char* source_window = "Source";
-  namedWindow( source_window, WINDOW_AUTOSIZE );
-  imshow( source_window, src );
+    /// Create Window
+    const char* source_window = "Source";
+    namedWindow( source_window );
+    imshow( source_window, src );
 
-  createTrackbar( " Threshold:", "Source", &thresh, max_thresh, thresh_callback );
-  thresh_callback( 0, 0 );
+    const int max_thresh = 255;
+    createTrackbar( "Canny thresh:", source_window, &thresh, max_thresh, thresh_callback );
+    thresh_callback( 0, 0 );
 
-  waitKey(0);
-  return(0);
+    waitKey();
+    return 0;
 }
 
 /**
@@ -56,31 +56,30 @@ int main( int argc, char** argv )
  */
 void thresh_callback(int, void* )
 {
-  Mat threshold_output;
-  vector<vector<Point> > contours;
-  vector<Vec4i> hierarchy;
+    /// Detect edges using Canny
+    Mat canny_output;
+    Canny( src_gray, canny_output, thresh, thresh*2 );
 
-  /// Detect edges using Threshold
-  threshold( src_gray, threshold_output, thresh, 255, THRESH_BINARY );
+    /// Find contours
+    vector<vector<Point> > contours;
+    findContours( canny_output, contours, RETR_TREE, CHAIN_APPROX_SIMPLE );
 
-  /// Find contours
-  findContours( threshold_output, contours, hierarchy, RETR_TREE, CHAIN_APPROX_SIMPLE, Point(0, 0) );
+    /// Find the convex hull object for each contour
+    vector<vector<Point> >hull( contours.size() );
+    for( size_t i = 0; i < contours.size(); i++ )
+    {
+        convexHull( contours[i], hull[i] );
+    }
 
-  /// Find the convex hull object for each contour
-  vector<vector<Point> >hull( contours.size() );
-  for( size_t i = 0; i < contours.size(); i++ )
-     {   convexHull( contours[i], hull[i], false ); }
+    /// Draw contours + hull results
+    Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
+    for( size_t i = 0; i< contours.size(); i++ )
+    {
+        Scalar color = Scalar( rng.uniform(0, 256), rng.uniform(0,256), rng.uniform(0,256) );
+        drawContours( drawing, contours, (int)i, color );
+        drawContours( drawing, hull, (int)i, color );
+    }
 
-  /// Draw contours + hull results
-  Mat drawing = Mat::zeros( threshold_output.size(), CV_8UC3 );
-  for( size_t i = 0; i< contours.size(); i++ )
-     {
-       Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-       drawContours( drawing, contours, (int)i, color, 1, 8, vector<Vec4i>(), 0, Point() );
-       drawContours( drawing, hull, (int)i, color, 1, 8, vector<Vec4i>(), 0, Point() );
-     }
-
-  /// Show in a window
-  namedWindow( "Hull demo", WINDOW_AUTOSIZE );
-  imshow( "Hull demo", drawing );
+    /// Show in a window
+    imshow( "Hull demo", drawing );
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
index 6741cc61b3..eaccd14e83 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
@@ -8,13 +8,13 @@
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 #include <iostream>
+#include <iomanip>
 
 using namespace cv;
 using namespace std;
 
-Mat src; Mat src_gray;
+Mat src_gray;
 int thresh = 100;
-int max_thresh = 255;
 RNG rng(12345);
 
 /// Function header
@@ -25,31 +25,32 @@ void thresh_callback(int, void* );
  */
 int main( int argc, char** argv )
 {
-  /// Load source image and convert it to gray
-  CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
-  src = imread( parser.get<String>( "@input" ), IMREAD_COLOR );
-
-  if( src.empty() )
-  {
-      cout << "Could not open or find the image!\n" << endl;
-      cout << "usage: " << argv[0] << " <Input image>" << endl;
-      exit(0);
-  }
-
-  /// Convert image to gray and blur it
-  cvtColor( src, src_gray, COLOR_BGR2GRAY );
-  blur( src_gray, src_gray, Size(3,3) );
-
-  /// Create Window
-  const char* source_window = "Source";
-  namedWindow( source_window, WINDOW_AUTOSIZE );
-  imshow( source_window, src );
-
-  createTrackbar( " Canny thresh:", "Source", &thresh, max_thresh, thresh_callback );
-  thresh_callback( 0, 0 );
-
-  waitKey(0);
-  return(0);
+    /// Load source image
+    CommandLineParser parser( argc, argv, "{@input | ../data/stuff.jpg | input image}" );
+    Mat src = imread( parser.get<String>( "@input" ) );
+
+    if( src.empty() )
+    {
+        cout << "Could not open or find the image!\n" << endl;
+        cout << "usage: " << argv[0] << " <Input image>" << endl;
+        return -1;
+    }
+
+    /// Convert image to gray and blur it
+    cvtColor( src, src_gray, COLOR_BGR2GRAY );
+    blur( src_gray, src_gray, Size(3,3) );
+
+    /// Create Window
+    const char* source_window = "Source";
+    namedWindow( source_window );
+    imshow( source_window, src );
+
+    const int max_thresh = 255;
+    createTrackbar( "Canny thresh:", source_window, &thresh, max_thresh, thresh_callback );
+    thresh_callback( 0, 0 );
+
+    waitKey();
+    return 0;
 }
 
 /**
@@ -57,44 +58,47 @@ int main( int argc, char** argv )
  */
 void thresh_callback(int, void* )
 {
-  Mat canny_output;
-  vector<vector<Point> > contours;
-
-  /// Detect edges using canny
-  Canny( src_gray, canny_output, thresh, thresh*2, 3 );
-  /// Find contours
-  findContours( canny_output, contours, RETR_TREE, CHAIN_APPROX_SIMPLE );
-
-  /// Get the moments
-  vector<Moments> mu(contours.size() );
-  for( size_t i = 0; i < contours.size(); i++ )
-     { mu[i] = moments( contours[i], false ); }
-
-  ///  Get the mass centers:
-  vector<Point2f> mc( contours.size() );
-  for( size_t i = 0; i < contours.size(); i++ )
-     { mc[i] = Point2f( static_cast<float>(mu[i].m10/mu[i].m00) , static_cast<float>(mu[i].m01/mu[i].m00) ); }
-
-  /// Draw contours
-  Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
-  for( size_t i = 0; i< contours.size(); i++ )
-     {
-       Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-       drawContours( drawing, contours, (int)i, color, 2, LINE_8 );
-       circle( drawing, mc[i], 4, color, -1, 8, 0 );
-     }
-
-  /// Show in a window
-  namedWindow( "Contours", WINDOW_AUTOSIZE );
-  imshow( "Contours", drawing );
-
-  /// Calculate the area with the moments 00 and compare with the result of the OpenCV function
-  printf("\t Info: Area and Contour Length \n");
-  for( size_t i = 0; i< contours.size(); i++ )
-     {
-       printf(" * Contour[%d] - Area (M_00) = %.2f - Area OpenCV: %.2f - Length: %.2f \n", (int)i, mu[i].m00, contourArea(contours[i]), arcLength( contours[i], true ) );
-       Scalar color = Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
-       drawContours( drawing, contours, (int)i, color, 2, LINE_8 );
-       circle( drawing, mc[i], 4, color, -1, 8, 0 );
-     }
+    /// Detect edges using canny
+    Mat canny_output;
+    Canny( src_gray, canny_output, thresh, thresh*2, 3 );
+    /// Find contours
+    vector<vector<Point> > contours;
+    findContours( canny_output, contours, RETR_TREE, CHAIN_APPROX_SIMPLE );
+
+    /// Get the moments
+    vector<Moments> mu(contours.size() );
+    for( size_t i = 0; i < contours.size(); i++ )
+    {
+        mu[i] = moments( contours[i] );
+    }
+
+    ///  Get the mass centers
+    vector<Point2f> mc( contours.size() );
+    for( size_t i = 0; i < contours.size(); i++ )
+    {
+        //add 1e-5 to avoid division by zero
+        mc[i] = Point2f( static_cast<float>(mu[i].m10 / (mu[i].m00 + 1e-5)),
+                         static_cast<float>(mu[i].m01 / (mu[i].m00 + 1e-5)) );
+        cout << "mc[" << i << "]=" << mc[i] << endl;
+    }
+
+    /// Draw contours
+    Mat drawing = Mat::zeros( canny_output.size(), CV_8UC3 );
+    for( size_t i = 0; i< contours.size(); i++ )
+    {
+        Scalar color = Scalar( rng.uniform(0, 256), rng.uniform(0,256), rng.uniform(0,256) );
+        drawContours( drawing, contours, (int)i, color, 2 );
+        circle( drawing, mc[i], 4, color, -1 );
+    }
+
+    /// Show in a window
+    imshow( "Contours", drawing );
+
+    /// Calculate the area with the moments 00 and compare with the result of the OpenCV function
+    cout << "\t Info: Area and Contour Length \n";
+    for( size_t i = 0; i < contours.size(); i++ )
+    {
+        cout << " * Contour[" << i << "] - Area (M_00) = " << std::fixed << std::setprecision(2) << mu[i].m00
+             << " - Area OpenCV: " << contourArea(contours[i]) << " - Length: " << arcLength( contours[i], true ) << endl;
+    }
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
index efc481a5b2..da3feedc15 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
@@ -16,60 +16,71 @@ using namespace std;
  */
 int main( void )
 {
-  /// Create an image
-  const int r = 100;
-  Mat src = Mat::zeros( Size( 4*r, 4*r ), CV_8UC1 );
+    /// Create an image
+    const int r = 100;
+    Mat src = Mat::zeros( Size( 4*r, 4*r ), CV_8U );
 
-  /// Create a sequence of points to make a contour:
-  vector<Point2f> vert(6);
+    /// Create a sequence of points to make a contour
+    vector<Point2f> vert(6);
+    vert[0] = Point( 3*r/2, static_cast<int>(1.34*r) );
+    vert[1] = Point( 1*r, 2*r );
+    vert[2] = Point( 3*r/2, static_cast<int>(2.866*r) );
+    vert[3] = Point( 5*r/2, static_cast<int>(2.866*r) );
+    vert[4] = Point( 3*r, 2*r );
+    vert[5] = Point( 5*r/2, static_cast<int>(1.34*r) );
 
-  vert[0] = Point( 3*r/2, static_cast<int>(1.34*r) );
-  vert[1] = Point( 1*r, 2*r );
-  vert[2] = Point( 3*r/2, static_cast<int>(2.866*r) );
-  vert[3] = Point( 5*r/2, static_cast<int>(2.866*r) );
-  vert[4] = Point( 3*r, 2*r );
-  vert[5] = Point( 5*r/2, static_cast<int>(1.34*r) );
+    /// Draw it in src
+    for( int i = 0; i < 6; i++ )
+    {
+        line( src, vert[i],  vert[(i+1)%6], Scalar( 255 ), 3 );
+    }
 
-  /// Draw it in src
-  for( int j = 0; j < 6; j++ )
-     { line( src, vert[j],  vert[(j+1)%6], Scalar( 255 ), 3, 8 ); }
+    /// Get the contours
+    vector<vector<Point> > contours;
+    findContours( src, contours, RETR_TREE, CHAIN_APPROX_SIMPLE);
 
-  /// Get the contours
-  vector<vector<Point> > contours;
+    /// Calculate the distances to the contour
+    Mat raw_dist( src.size(), CV_32F );
+    for( int i = 0; i < src.rows; i++ )
+    {
+        for( int j = 0; j < src.cols; j++ )
+        {
+            raw_dist.at<float>(i,j) = (float)pointPolygonTest( contours[0], Point2f((float)j, (float)i), true );
+        }
+    }
 
-  findContours( src, contours, RETR_TREE, CHAIN_APPROX_SIMPLE);
+    double minVal, maxVal;
+    minMaxLoc( raw_dist, &minVal, &maxVal );
+    minVal = abs(minVal);
+    maxVal = abs(maxVal);
 
-  /// Calculate the distances to the contour
-  Mat raw_dist( src.size(), CV_32FC1 );
-
-  for( int j = 0; j < src.rows; j++ )
-     { for( int i = 0; i < src.cols; i++ )
-          { raw_dist.at<float>(j,i) = (float)pointPolygonTest( contours[0], Point2f((float)i,(float)j), true ); }
-     }
-
-  double minVal; double maxVal;
-  minMaxLoc( raw_dist, &minVal, &maxVal, 0, 0, Mat() );
-  minVal = abs(minVal); maxVal = abs(maxVal);
-
-  /// Depicting the  distances graphically
-  Mat drawing = Mat::zeros( src.size(), CV_8UC3 );
-
-  for( int j = 0; j < src.rows; j++ )
-     { for( int i = 0; i < src.cols; i++ )
-          {
-            if( raw_dist.at<float>(j,i) < 0 )
-              { drawing.at<Vec3b>(j,i)[0] = (uchar)(255 - abs(raw_dist.at<float>(j,i))*255/minVal); }
-            else if( raw_dist.at<float>(j,i) > 0 )
-              { drawing.at<Vec3b>(j,i)[2] = (uchar)(255 - raw_dist.at<float>(j,i)*255/maxVal); }
+    /// Depicting the  distances graphically
+    Mat drawing = Mat::zeros( src.size(), CV_8UC3 );
+    for( int i = 0; i < src.rows; i++ )
+    {
+        for( int j = 0; j < src.cols; j++ )
+        {
+            if( raw_dist.at<float>(i,j) < 0 )
+            {
+                drawing.at<Vec3b>(i,j)[0] = (uchar)(255 - abs(raw_dist.at<float>(i,j)) * 255 / minVal);
+            }
+            else if( raw_dist.at<float>(i,j) > 0 )
+            {
+                drawing.at<Vec3b>(i,j)[2] = (uchar)(255 - raw_dist.at<float>(i,j) * 255 / maxVal);
+            }
             else
-              { drawing.at<Vec3b>(j,i)[0] = 255; drawing.at<Vec3b>(j,i)[1] = 255; drawing.at<Vec3b>(j,i)[2] = 255; }
-          }
-     }
+            {
+                drawing.at<Vec3b>(i,j)[0] = 255;
+                drawing.at<Vec3b>(i,j)[1] = 255;
+                drawing.at<Vec3b>(i,j)[2] = 255;
+            }
+        }
+    }
 
-  /// Show your results
-  imshow( "Source", src );
-  imshow( "Distance", drawing );
+    /// Show your results
+    imshow( "Source", src );
+    imshow( "Distance", drawing );
 
-  waitKey(0);
-  return(0);
+    waitKey();
+    return 0;
 }
diff --git a/samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java b/samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java
new file mode 100644
index 0000000000..85bbf45e5f
--- /dev/null
+++ b/samples/java/tutorial_code/ShapeDescriptors/bounding_rects_circles/GeneralContoursDemo1.java
@@ -0,0 +1,179 @@
+import java.awt.BorderLayout;
+import java.awt.Container;
+import java.awt.Image;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import javax.swing.BoxLayout;
+import javax.swing.ImageIcon;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JSlider;
+import javax.swing.event.ChangeEvent;
+import javax.swing.event.ChangeListener;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfPoint;
+import org.opencv.core.MatOfPoint2f;
+import org.opencv.core.Point;
+import org.opencv.core.Rect;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class GeneralContours1 {
+    private Mat srcGray = new Mat();
+    private JFrame frame;
+    private JLabel imgSrcLabel;
+    private JLabel imgContoursLabel;
+    private static final int MAX_THRESHOLD = 255;
+    private int threshold = 100;
+    private Random rng = new Random(12345);
+
+    public GeneralContours1(String[] args) {
+        //! [setup]
+        /// Load source image
+        String filename = args.length > 0 ? args[0] : "../data/stuff.jpg";
+        Mat src = Imgcodecs.imread(filename);
+        if (src.empty()) {
+            System.err.println("Cannot read image: " + filename);
+            System.exit(0);
+        }
+
+        /// Convert image to gray and blur it
+        Imgproc.cvtColor(src, srcGray, Imgproc.COLOR_BGR2GRAY);
+        Imgproc.blur(srcGray, srcGray, new Size(3, 3));
+        //! [setup]
+
+        //! [createWindow]
+        // Create and set up the window.
+        frame = new JFrame("Creating Bounding boxes and circles for contours demo");
+        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+        // Set up the content pane.
+        Image img = HighGui.toBufferedImage(src);
+        addComponentsToPane(frame.getContentPane(), img);
+        //! [createWindow]
+        // Use the content pane's default BorderLayout. No need for
+        // setLayout(new BorderLayout());
+        // Display the window.
+        frame.pack();
+        frame.setVisible(true);
+        update();
+    }
+
+    private void addComponentsToPane(Container pane, Image img) {
+        if (!(pane.getLayout() instanceof BorderLayout)) {
+            pane.add(new JLabel("Container doesn't use BorderLayout!"));
+            return;
+        }
+
+        JPanel sliderPanel = new JPanel();
+        sliderPanel.setLayout(new BoxLayout(sliderPanel, BoxLayout.PAGE_AXIS));
+
+        //! [trackbar]
+        sliderPanel.add(new JLabel("Canny threshold: "));
+        JSlider slider = new JSlider(0, MAX_THRESHOLD, threshold);
+        slider.setMajorTickSpacing(20);
+        slider.setMinorTickSpacing(10);
+        slider.setPaintTicks(true);
+        slider.setPaintLabels(true);
+        slider.addChangeListener(new ChangeListener() {
+            @Override
+            public void stateChanged(ChangeEvent e) {
+                JSlider source = (JSlider) e.getSource();
+                threshold = source.getValue();
+                update();
+            }
+        });
+        //! [trackbar]
+        sliderPanel.add(slider);
+        pane.add(sliderPanel, BorderLayout.PAGE_START);
+
+        JPanel imgPanel = new JPanel();
+        imgSrcLabel = new JLabel(new ImageIcon(img));
+        imgPanel.add(imgSrcLabel);
+
+        Mat blackImg = Mat.zeros(srcGray.size(), CvType.CV_8U);
+        imgContoursLabel = new JLabel(new ImageIcon(HighGui.toBufferedImage(blackImg)));
+        imgPanel.add(imgContoursLabel);
+
+        pane.add(imgPanel, BorderLayout.CENTER);
+    }
+
+    private void update() {
+        //! [Canny]
+        /// Detect edges using Canny
+        Mat cannyOutput = new Mat();
+        Imgproc.Canny(srcGray, cannyOutput, threshold, threshold * 2);
+        //! [Canny]
+
+        //! [findContours]
+        /// Find contours
+        List<MatOfPoint> contours = new ArrayList<>();
+        Mat hierarchy = new Mat();
+        Imgproc.findContours(cannyOutput, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_SIMPLE);
+        //! [findContours]
+
+        //! [allthework]
+        /// Approximate contours to polygons + get bounding rects and circles
+        MatOfPoint2f[] contoursPoly  = new MatOfPoint2f[contours.size()];
+        Rect[] boundRect = new Rect[contours.size()];
+        Point[] centers = new Point[contours.size()];
+        float[][] radius = new float[contours.size()][1];
+
+        for (int i = 0; i < contours.size(); i++) {
+            contoursPoly[i] = new MatOfPoint2f();
+            Imgproc.approxPolyDP(new MatOfPoint2f(contours.get(i).toArray()), contoursPoly[i], 3, true);
+            boundRect[i] = Imgproc.boundingRect(new MatOfPoint(contoursPoly[i].toArray()));
+            centers[i] = new Point();
+            Imgproc.minEnclosingCircle(contoursPoly[i], centers[i], radius[i]);
+        }
+        //! [allthework]
+
+        //! [zeroMat]
+        Mat drawing = Mat.zeros(cannyOutput.size(), CvType.CV_8UC3);
+        //! [zeroMat]
+        //! [forContour]
+        /// Draw polygonal contour + bonding rects + circles
+        List<MatOfPoint> contoursPolyList = new ArrayList<>(contoursPoly.length);
+        for (MatOfPoint2f poly : contoursPoly) {
+            contoursPolyList.add(new MatOfPoint(poly.toArray()));
+        }
+
+        for (int i = 0; i < contours.size(); i++) {
+            Scalar color = new Scalar(rng.nextInt(256), rng.nextInt(256), rng.nextInt(256));
+            Imgproc.drawContours(drawing, contoursPolyList, i, color);
+            Imgproc.rectangle(drawing, boundRect[i].tl(), boundRect[i].br(), color, 2);
+            Imgproc.circle(drawing, centers[i], (int) radius[i][0], color, 2);
+        }
+        //! [forContour]
+
+        //! [showDrawings]
+        /// Show in a window
+        imgContoursLabel.setIcon(new ImageIcon(HighGui.toBufferedImage(drawing)));
+        frame.repaint();
+        //! [showDrawings]
+    }
+}
+
+public class GeneralContoursDemo1 {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        // Schedule a job for the event dispatch thread:
+        // creating and showing this application's GUI.
+        javax.swing.SwingUtilities.invokeLater(new Runnable() {
+            @Override
+            public void run() {
+                new GeneralContours1(args);
+            }
+        });
+    }
+}
diff --git a/samples/java/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/GeneralContoursDemo2.java b/samples/java/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/GeneralContoursDemo2.java
new file mode 100644
index 0000000000..c7b13dd174
--- /dev/null
+++ b/samples/java/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/GeneralContoursDemo2.java
@@ -0,0 +1,176 @@
+import java.awt.BorderLayout;
+import java.awt.Container;
+import java.awt.Image;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import javax.swing.BoxLayout;
+import javax.swing.ImageIcon;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JSlider;
+import javax.swing.event.ChangeEvent;
+import javax.swing.event.ChangeListener;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfPoint;
+import org.opencv.core.MatOfPoint2f;
+import org.opencv.core.Point;
+import org.opencv.core.RotatedRect;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class GeneralContours2 {
+    private Mat srcGray = new Mat();
+    private JFrame frame;
+    private JLabel imgSrcLabel;
+    private JLabel imgContoursLabel;
+    private static final int MAX_THRESHOLD = 255;
+    private int threshold = 100;
+    private Random rng = new Random(12345);
+
+    public GeneralContours2(String[] args) {
+        //! [setup]
+        /// Load source image
+        String filename = args.length > 0 ? args[0] : "../data/stuff.jpg";
+        Mat src = Imgcodecs.imread(filename);
+        if (src.empty()) {
+            System.err.println("Cannot read image: " + filename);
+            System.exit(0);
+        }
+
+        /// Convert image to gray and blur it
+        Imgproc.cvtColor(src, srcGray, Imgproc.COLOR_BGR2GRAY);
+        Imgproc.blur(srcGray, srcGray, new Size(3, 3));
+        //! [setup]
+
+        //! [createWindow]
+        // Create and set up the window.
+        frame = new JFrame("Creating Bounding rotated boxes and ellipses for contours demo");
+        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+        // Set up the content pane.
+        Image img = HighGui.toBufferedImage(src);
+        addComponentsToPane(frame.getContentPane(), img);
+        //! [createWindow]
+        // Use the content pane's default BorderLayout. No need for
+        // setLayout(new BorderLayout());
+        // Display the window.
+        frame.pack();
+        frame.setVisible(true);
+        update();
+    }
+
+    private void addComponentsToPane(Container pane, Image img) {
+        if (!(pane.getLayout() instanceof BorderLayout)) {
+            pane.add(new JLabel("Container doesn't use BorderLayout!"));
+            return;
+        }
+
+        JPanel sliderPanel = new JPanel();
+        sliderPanel.setLayout(new BoxLayout(sliderPanel, BoxLayout.PAGE_AXIS));
+
+        //! [trackbar]
+        sliderPanel.add(new JLabel("Canny threshold: "));
+        JSlider slider = new JSlider(0, MAX_THRESHOLD, threshold);
+        slider.setMajorTickSpacing(20);
+        slider.setMinorTickSpacing(10);
+        slider.setPaintTicks(true);
+        slider.setPaintLabels(true);
+        slider.addChangeListener(new ChangeListener() {
+            @Override
+            public void stateChanged(ChangeEvent e) {
+                JSlider source = (JSlider) e.getSource();
+                threshold = source.getValue();
+                update();
+            }
+        });
+        //! [trackbar]
+        sliderPanel.add(slider);
+        pane.add(sliderPanel, BorderLayout.PAGE_START);
+
+        JPanel imgPanel = new JPanel();
+        imgSrcLabel = new JLabel(new ImageIcon(img));
+        imgPanel.add(imgSrcLabel);
+
+        Mat blackImg = Mat.zeros(srcGray.size(), CvType.CV_8U);
+        imgContoursLabel = new JLabel(new ImageIcon(HighGui.toBufferedImage(blackImg)));
+        imgPanel.add(imgContoursLabel);
+
+        pane.add(imgPanel, BorderLayout.CENTER);
+    }
+
+    private void update() {
+        //! [Canny]
+        /// Detect edges using Canny
+        Mat cannyOutput = new Mat();
+        Imgproc.Canny(srcGray, cannyOutput, threshold, threshold * 2);
+        //! [Canny]
+
+        //! [findContours]
+        /// Find contours
+        List<MatOfPoint> contours = new ArrayList<>();
+        Mat hierarchy = new Mat();
+        Imgproc.findContours(cannyOutput, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_SIMPLE);
+        //! [findContours]
+
+        /// Find the rotated rectangles and ellipses for each contour
+        RotatedRect[] minRect = new RotatedRect[contours.size()];
+        RotatedRect[] minEllipse = new RotatedRect[contours.size()];
+        for (int i = 0; i < contours.size(); i++) {
+            minRect[i] = Imgproc.minAreaRect(new MatOfPoint2f(contours.get(i).toArray()));
+            minEllipse[i] = new RotatedRect();
+            if (contours.get(i).rows() > 5) {
+                minEllipse[i] = Imgproc.fitEllipse(new MatOfPoint2f(contours.get(i).toArray()));
+            }
+        }
+
+        //! [zeroMat]
+        /// Draw contours + rotated rects + ellipses
+        Mat drawing = Mat.zeros(cannyOutput.size(), CvType.CV_8UC3);
+        //! [zeroMat]
+        //! [forContour]
+        for (int i = 0; i < contours.size(); i++) {
+            Scalar color = new Scalar(rng.nextInt(256), rng.nextInt(256), rng.nextInt(256));
+            // contour
+            Imgproc.drawContours(drawing, contours, i, color);
+            // ellipse
+            Imgproc.ellipse(drawing, minEllipse[i], color, 2);
+            // rotated rectangle
+            Point[] rectPoints = new Point[4];
+            minRect[i].points(rectPoints);
+            for (int j = 0; j < 4; j++) {
+                Imgproc.line(drawing, rectPoints[j], rectPoints[(j+1) % 4], color);
+            }
+        }
+        //! [forContour]
+
+        //! [showDrawings]
+        /// Show in a window
+        imgContoursLabel.setIcon(new ImageIcon(HighGui.toBufferedImage(drawing)));
+        frame.repaint();
+        //! [showDrawings]
+    }
+}
+
+public class GeneralContoursDemo2 {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        // Schedule a job for the event dispatch thread:
+        // creating and showing this application's GUI.
+        javax.swing.SwingUtilities.invokeLater(new Runnable() {
+            @Override
+            public void run() {
+                new GeneralContours2(args);
+            }
+        });
+    }
+}
diff --git a/samples/java/tutorial_code/ShapeDescriptors/find_contours/FindContoursDemo.java b/samples/java/tutorial_code/ShapeDescriptors/find_contours/FindContoursDemo.java
new file mode 100644
index 0000000000..5eec4f878a
--- /dev/null
+++ b/samples/java/tutorial_code/ShapeDescriptors/find_contours/FindContoursDemo.java
@@ -0,0 +1,137 @@
+import java.awt.BorderLayout;
+import java.awt.Container;
+import java.awt.Image;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import javax.swing.BoxLayout;
+import javax.swing.ImageIcon;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JSlider;
+import javax.swing.event.ChangeEvent;
+import javax.swing.event.ChangeListener;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfPoint;
+import org.opencv.core.Point;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class FindContours {
+    private Mat srcGray = new Mat();
+    private JFrame frame;
+    private JLabel imgSrcLabel;
+    private JLabel imgContoursLabel;
+    private static final int MAX_THRESHOLD = 255;
+    private int threshold = 100;
+    private Random rng = new Random(12345);
+
+    public FindContours(String[] args) {
+        /// Load source image
+        String filename = args.length > 0 ? args[0] : "../data/HappyFish.jpg";
+        Mat src = Imgcodecs.imread(filename);
+        if (src.empty()) {
+            System.err.println("Cannot read image: " + filename);
+            System.exit(0);
+        }
+
+        /// Convert image to gray and blur it
+        Imgproc.cvtColor(src, srcGray, Imgproc.COLOR_BGR2GRAY);
+        Imgproc.blur(srcGray, srcGray, new Size(3, 3));
+
+        // Create and set up the window.
+        frame = new JFrame("Finding contours in your image demo");
+        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+        // Set up the content pane.
+        Image img = HighGui.toBufferedImage(src);
+        addComponentsToPane(frame.getContentPane(), img);
+        // Use the content pane's default BorderLayout. No need for
+        // setLayout(new BorderLayout());
+        // Display the window.
+        frame.pack();
+        frame.setVisible(true);
+        update();
+    }
+
+    private void addComponentsToPane(Container pane, Image img) {
+        if (!(pane.getLayout() instanceof BorderLayout)) {
+            pane.add(new JLabel("Container doesn't use BorderLayout!"));
+            return;
+        }
+
+        JPanel sliderPanel = new JPanel();
+        sliderPanel.setLayout(new BoxLayout(sliderPanel, BoxLayout.PAGE_AXIS));
+
+        sliderPanel.add(new JLabel("Canny threshold: "));
+        JSlider slider = new JSlider(0, MAX_THRESHOLD, threshold);
+        slider.setMajorTickSpacing(20);
+        slider.setMinorTickSpacing(10);
+        slider.setPaintTicks(true);
+        slider.setPaintLabels(true);
+        slider.addChangeListener(new ChangeListener() {
+            @Override
+            public void stateChanged(ChangeEvent e) {
+                JSlider source = (JSlider) e.getSource();
+                threshold = source.getValue();
+                update();
+            }
+        });
+        sliderPanel.add(slider);
+        pane.add(sliderPanel, BorderLayout.PAGE_START);
+
+        JPanel imgPanel = new JPanel();
+        imgSrcLabel = new JLabel(new ImageIcon(img));
+        imgPanel.add(imgSrcLabel);
+
+        Mat blackImg = Mat.zeros(srcGray.size(), CvType.CV_8U);
+        imgContoursLabel = new JLabel(new ImageIcon(HighGui.toBufferedImage(blackImg)));
+        imgPanel.add(imgContoursLabel);
+
+        pane.add(imgPanel, BorderLayout.CENTER);
+    }
+
+    private void update() {
+        /// Detect edges using Canny
+        Mat cannyOutput = new Mat();
+        Imgproc.Canny(srcGray, cannyOutput, threshold, threshold * 2);
+
+        /// Find contours
+        List<MatOfPoint> contours = new ArrayList<>();
+        Mat hierarchy = new Mat();
+        Imgproc.findContours(cannyOutput, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_SIMPLE);
+
+        /// Draw contours
+        Mat drawing = Mat.zeros(cannyOutput.size(), CvType.CV_8UC3);
+        for (int i = 0; i < contours.size(); i++) {
+            Scalar color = new Scalar(rng.nextInt(256), rng.nextInt(256), rng.nextInt(256));
+            Imgproc.drawContours(drawing, contours, i, color, 2, Core.LINE_8, hierarchy, 0, new Point());
+        }
+
+        imgContoursLabel.setIcon(new ImageIcon(HighGui.toBufferedImage(drawing)));
+        frame.repaint();
+    }
+}
+
+public class FindContoursDemo {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        // Schedule a job for the event dispatch thread:
+        // creating and showing this application's GUI.
+        javax.swing.SwingUtilities.invokeLater(new Runnable() {
+            @Override
+            public void run() {
+                new FindContours(args);
+            }
+        });
+    }
+}
diff --git a/samples/java/tutorial_code/ShapeDescriptors/hull/HullDemo.java b/samples/java/tutorial_code/ShapeDescriptors/hull/HullDemo.java
new file mode 100644
index 0000000000..0e2104fb2b
--- /dev/null
+++ b/samples/java/tutorial_code/ShapeDescriptors/hull/HullDemo.java
@@ -0,0 +1,154 @@
+import java.awt.BorderLayout;
+import java.awt.Container;
+import java.awt.Image;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import javax.swing.BoxLayout;
+import javax.swing.ImageIcon;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JSlider;
+import javax.swing.event.ChangeEvent;
+import javax.swing.event.ChangeListener;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfInt;
+import org.opencv.core.MatOfPoint;
+import org.opencv.core.Point;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+
+class Hull {
+    private Mat srcGray = new Mat();
+    private JFrame frame;
+    private JLabel imgSrcLabel;
+    private JLabel imgContoursLabel;
+    private static final int MAX_THRESHOLD = 255;
+    private int threshold = 100;
+    private Random rng = new Random(12345);
+
+    public Hull(String[] args) {
+        /// Load source image
+        String filename = args.length > 0 ? args[0] : "../data/stuff.jpg";
+        Mat src = Imgcodecs.imread(filename);
+        if (src.empty()) {
+            System.err.println("Cannot read image: " + filename);
+            System.exit(0);
+        }
+
+        /// Convert image to gray and blur it
+        Imgproc.cvtColor(src, srcGray, Imgproc.COLOR_BGR2GRAY);
+        Imgproc.blur(srcGray, srcGray, new Size(3, 3));
+
+        // Create and set up the window.
+        frame = new JFrame("Convex Hull demo");
+        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+        // Set up the content pane.
+        Image img = HighGui.toBufferedImage(src);
+        addComponentsToPane(frame.getContentPane(), img);
+        // Use the content pane's default BorderLayout. No need for
+        // setLayout(new BorderLayout());
+        // Display the window.
+        frame.pack();
+        frame.setVisible(true);
+        update();
+    }
+
+    private void addComponentsToPane(Container pane, Image img) {
+        if (!(pane.getLayout() instanceof BorderLayout)) {
+            pane.add(new JLabel("Container doesn't use BorderLayout!"));
+            return;
+        }
+
+        JPanel sliderPanel = new JPanel();
+        sliderPanel.setLayout(new BoxLayout(sliderPanel, BoxLayout.PAGE_AXIS));
+
+        sliderPanel.add(new JLabel("Canny threshold: "));
+        JSlider slider = new JSlider(0, MAX_THRESHOLD, threshold);
+        slider.setMajorTickSpacing(20);
+        slider.setMinorTickSpacing(10);
+        slider.setPaintTicks(true);
+        slider.setPaintLabels(true);
+        slider.addChangeListener(new ChangeListener() {
+            @Override
+            public void stateChanged(ChangeEvent e) {
+                JSlider source = (JSlider) e.getSource();
+                threshold = source.getValue();
+                update();
+            }
+        });
+        sliderPanel.add(slider);
+        pane.add(sliderPanel, BorderLayout.PAGE_START);
+
+        JPanel imgPanel = new JPanel();
+        imgSrcLabel = new JLabel(new ImageIcon(img));
+        imgPanel.add(imgSrcLabel);
+
+        Mat blackImg = Mat.zeros(srcGray.size(), CvType.CV_8U);
+        imgContoursLabel = new JLabel(new ImageIcon(HighGui.toBufferedImage(blackImg)));
+        imgPanel.add(imgContoursLabel);
+
+        pane.add(imgPanel, BorderLayout.CENTER);
+    }
+
+    private void update() {
+        /// Detect edges using Canny
+        Mat cannyOutput = new Mat();
+        Imgproc.Canny(srcGray, cannyOutput, threshold, threshold * 2);
+
+        /// Find contours
+        List<MatOfPoint> contours = new ArrayList<>();
+        Mat hierarchy = new Mat();
+        Imgproc.findContours(cannyOutput, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_SIMPLE);
+
+        /// Find the convex hull object for each contour
+        List<MatOfPoint> hullList = new ArrayList<>();
+        for (MatOfPoint contour : contours) {
+            MatOfInt hull = new MatOfInt();
+            Imgproc.convexHull(contour, hull);
+
+            Point[] contourArray = contour.toArray();
+            Point[] hullPoints = new Point[hull.rows()];
+            List<Integer> hullContourIdxList = hull.toList();
+            for (int i = 0; i < hullContourIdxList.size(); i++) {
+                hullPoints[i] = contourArray[hullContourIdxList.get(i)];
+            }
+            hullList.add(new MatOfPoint(hullPoints));
+        }
+
+        /// Draw contours + hull results
+        Mat drawing = Mat.zeros(cannyOutput.size(), CvType.CV_8UC3);
+        for (int i = 0; i < contours.size(); i++) {
+            Scalar color = new Scalar(rng.nextInt(256), rng.nextInt(256), rng.nextInt(256));
+            Imgproc.drawContours(drawing, contours, i, color);
+            Imgproc.drawContours(drawing, hullList, i, color );
+        }
+
+        imgContoursLabel.setIcon(new ImageIcon(HighGui.toBufferedImage(drawing)));
+        frame.repaint();
+    }
+}
+
+public class HullDemo {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        // Schedule a job for the event dispatch thread:
+        // creating and showing this application's GUI.
+        javax.swing.SwingUtilities.invokeLater(new Runnable() {
+            @Override
+            public void run() {
+                new Hull(args);
+            }
+        });
+    }
+}
diff --git a/samples/java/tutorial_code/ShapeDescriptors/moments/MomentsDemo.java b/samples/java/tutorial_code/ShapeDescriptors/moments/MomentsDemo.java
new file mode 100644
index 0000000000..ffc24207fb
--- /dev/null
+++ b/samples/java/tutorial_code/ShapeDescriptors/moments/MomentsDemo.java
@@ -0,0 +1,178 @@
+import java.awt.BorderLayout;
+import java.awt.Container;
+import java.awt.Image;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import javax.swing.BoxLayout;
+import javax.swing.ImageIcon;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JSlider;
+import javax.swing.event.ChangeEvent;
+import javax.swing.event.ChangeListener;
+
+import org.opencv.core.Core;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfPoint;
+import org.opencv.core.MatOfPoint2f;
+import org.opencv.core.Point;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgcodecs.Imgcodecs;
+import org.opencv.imgproc.Imgproc;
+import org.opencv.imgproc.Moments;
+
+class MomentsClass {
+    private Mat srcGray = new Mat();
+    private JFrame frame;
+    private JLabel imgSrcLabel;
+    private JLabel imgContoursLabel;
+    private static final int MAX_THRESHOLD = 255;
+    private int threshold = 100;
+    private Random rng = new Random(12345);
+
+    public MomentsClass(String[] args) {
+        //! [setup]
+        /// Load source image
+        String filename = args.length > 0 ? args[0] : "../data/stuff.jpg";
+        Mat src = Imgcodecs.imread(filename);
+        if (src.empty()) {
+            System.err.println("Cannot read image: " + filename);
+            System.exit(0);
+        }
+
+        /// Convert image to gray and blur it
+        Imgproc.cvtColor(src, srcGray, Imgproc.COLOR_BGR2GRAY);
+        Imgproc.blur(srcGray, srcGray, new Size(3, 3));
+        //! [setup]
+
+        //! [createWindow]
+        // Create and set up the window.
+        frame = new JFrame("Image Moments demo");
+        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+        // Set up the content pane.
+        Image img = HighGui.toBufferedImage(src);
+        addComponentsToPane(frame.getContentPane(), img);
+        //! [createWindow]
+        // Use the content pane's default BorderLayout. No need for
+        // setLayout(new BorderLayout());
+        // Display the window.
+        frame.pack();
+        frame.setVisible(true);
+        update();
+    }
+
+    private void addComponentsToPane(Container pane, Image img) {
+        if (!(pane.getLayout() instanceof BorderLayout)) {
+            pane.add(new JLabel("Container doesn't use BorderLayout!"));
+            return;
+        }
+
+        JPanel sliderPanel = new JPanel();
+        sliderPanel.setLayout(new BoxLayout(sliderPanel, BoxLayout.PAGE_AXIS));
+
+        //! [trackbar]
+        sliderPanel.add(new JLabel("Canny threshold: "));
+        JSlider slider = new JSlider(0, MAX_THRESHOLD, threshold);
+        slider.setMajorTickSpacing(20);
+        slider.setMinorTickSpacing(10);
+        slider.setPaintTicks(true);
+        slider.setPaintLabels(true);
+        slider.addChangeListener(new ChangeListener() {
+            @Override
+            public void stateChanged(ChangeEvent e) {
+                JSlider source = (JSlider) e.getSource();
+                threshold = source.getValue();
+                update();
+            }
+        });
+        //! [trackbar]
+        sliderPanel.add(slider);
+        pane.add(sliderPanel, BorderLayout.PAGE_START);
+
+        JPanel imgPanel = new JPanel();
+        imgSrcLabel = new JLabel(new ImageIcon(img));
+        imgPanel.add(imgSrcLabel);
+
+        Mat blackImg = Mat.zeros(srcGray.size(), CvType.CV_8U);
+        imgContoursLabel = new JLabel(new ImageIcon(HighGui.toBufferedImage(blackImg)));
+        imgPanel.add(imgContoursLabel);
+
+        pane.add(imgPanel, BorderLayout.CENTER);
+    }
+
+    private void update() {
+        //! [Canny]
+        /// Detect edges using Canny
+        Mat cannyOutput = new Mat();
+        Imgproc.Canny(srcGray, cannyOutput, threshold, threshold * 2);
+        //! [Canny]
+
+        //! [findContours]
+        /// Find contours
+        List<MatOfPoint> contours = new ArrayList<>();
+        Mat hierarchy = new Mat();
+        Imgproc.findContours(cannyOutput, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_SIMPLE);
+        //! [findContours]
+
+        /// Get the moments
+        List<Moments> mu = new ArrayList<>(contours.size());
+        for (int i = 0; i < contours.size(); i++) {
+            mu.add(Imgproc.moments(contours.get(i)));
+        }
+
+        /// Get the mass centers
+        List<Point> mc = new ArrayList<>(contours.size());
+        for (int i = 0; i < contours.size(); i++) {
+            //add 1e-5 to avoid division by zero
+            mc.add(new Point(mu.get(i).m10 / (mu.get(i).m00 + 1e-5), mu.get(i).m01 / (mu.get(i).m00 + 1e-5)));
+        }
+
+        //! [zeroMat]
+        /// Draw contours
+        Mat drawing = Mat.zeros(cannyOutput.size(), CvType.CV_8UC3);
+        //! [zeroMat]
+        //! [forContour]
+        for (int i = 0; i < contours.size(); i++) {
+            Scalar color = new Scalar(rng.nextInt(256), rng.nextInt(256), rng.nextInt(256));
+            Imgproc.drawContours(drawing, contours, i, color, 2);
+            Imgproc.circle(drawing, mc.get(i), 4, color, -1);
+        }
+        //! [forContour]
+
+        //! [showDrawings]
+        /// Show in a window
+        imgContoursLabel.setIcon(new ImageIcon(HighGui.toBufferedImage(drawing)));
+        frame.repaint();
+        //! [showDrawings]
+
+        /// Calculate the area with the moments 00 and compare with the result of the OpenCV function
+        System.out.println("\t Info: Area and Contour Length \n");
+        for (int i = 0; i < contours.size(); i++) {
+            System.out.format(" * Contour[%d] - Area (M_00) = %.2f - Area OpenCV: %.2f - Length: %.2f\n", i,
+                    mu.get(i).m00, Imgproc.contourArea(contours.get(i)),
+                    Imgproc.arcLength(new MatOfPoint2f(contours.get(i).toArray()), true));
+        }
+    }
+}
+
+public class MomentsDemo {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        // Schedule a job for the event dispatch thread:
+        // creating and showing this application's GUI.
+        javax.swing.SwingUtilities.invokeLater(new Runnable() {
+            @Override
+            public void run() {
+                new MomentsClass(args);
+            }
+        });
+    }
+}
diff --git a/samples/java/tutorial_code/ShapeDescriptors/point_polygon_test/PointPolygonTestDemo.java b/samples/java/tutorial_code/ShapeDescriptors/point_polygon_test/PointPolygonTestDemo.java
new file mode 100644
index 0000000000..2d0a542db7
--- /dev/null
+++ b/samples/java/tutorial_code/ShapeDescriptors/point_polygon_test/PointPolygonTestDemo.java
@@ -0,0 +1,93 @@
+import java.util.ArrayList;
+import java.util.List;
+
+import org.opencv.core.Core;
+import org.opencv.core.Core.MinMaxLocResult;
+import org.opencv.core.CvType;
+import org.opencv.core.Mat;
+import org.opencv.core.MatOfPoint;
+import org.opencv.core.MatOfPoint2f;
+import org.opencv.core.Point;
+import org.opencv.core.Scalar;
+import org.opencv.core.Size;
+import org.opencv.highgui.HighGui;
+import org.opencv.imgproc.Imgproc;
+
+class PointPolygonTest {
+    public void run() {
+        /// Create an image
+        int r = 100;
+        Mat src = Mat.zeros(new Size(4 * r, 4 * r), CvType.CV_8U);
+
+        /// Create a sequence of points to make a contour
+        List<Point> vert = new ArrayList<>(6);
+        vert.add(new Point(3 * r / 2, 1.34 * r));
+        vert.add(new Point(1 * r, 2 * r));
+        vert.add(new Point(3 * r / 2, 2.866 * r));
+        vert.add(new Point(5 * r / 2, 2.866 * r));
+        vert.add(new Point(3 * r, 2 * r));
+        vert.add(new Point(5 * r / 2, 1.34 * r));
+
+        /// Draw it in src
+        for (int i = 0; i < 6; i++) {
+            Imgproc.line(src, vert.get(i), vert.get((i + 1) % 6), new Scalar(255), 3);
+        }
+
+        /// Get the contours
+        List<MatOfPoint> contours = new ArrayList<>();
+        Mat hierarchy = new Mat();
+        Imgproc.findContours(src, contours, hierarchy, Imgproc.RETR_TREE, Imgproc.CHAIN_APPROX_SIMPLE);
+
+        /// Calculate the distances to the contour
+        Mat rawDist = new Mat(src.size(), CvType.CV_32F);
+        float[] rawDistData = new float[(int) (rawDist.total() * rawDist.channels())];
+        for (int i = 0; i < src.rows(); i++) {
+            for (int j = 0; j < src.cols(); j++) {
+                rawDistData[i * src.cols() + j] = (float) Imgproc
+                        .pointPolygonTest(new MatOfPoint2f(contours.get(0).toArray()), new Point(j, i), true);
+            }
+        }
+        rawDist.put(0, 0, rawDistData);
+
+        MinMaxLocResult res = Core.minMaxLoc(rawDist);
+        double minVal = Math.abs(res.minVal);
+        double maxVal = Math.abs(res.maxVal);
+
+        /// Depicting the distances graphically
+        Mat drawing = Mat.zeros(src.size(), CvType.CV_8UC3);
+        byte[] drawingData = new byte[(int) (drawing.total() * drawing.channels())];
+        for (int i = 0; i < src.rows(); i++) {
+            for (int j = 0; j < src.cols(); j++) {
+                if (rawDistData[i * src.cols() + j] < 0) {
+                    drawingData[(i * src.cols() + j) * 3] =
+                            (byte) (255 - Math.abs(rawDistData[i * src.cols() + j]) * 255 / minVal);
+                } else if (rawDistData[i * src.cols() + j] > 0) {
+                    drawingData[(i * src.cols() + j) * 3 + 2] =
+                            (byte) (255 - rawDistData[i * src.cols() + j] * 255 / maxVal);
+                } else {
+                    drawingData[(i * src.cols() + j) * 3] = (byte) 255;
+                    drawingData[(i * src.cols() + j) * 3 + 1] = (byte) 255;
+                    drawingData[(i * src.cols() + j) * 3 + 2] = (byte) 255;
+                }
+            }
+        }
+        drawing.put(0, 0, drawingData);
+
+        /// Show your results
+        HighGui.imshow("Source", src);
+        HighGui.imshow("Distance", drawing);
+
+        HighGui.waitKey();
+        System.exit(0);
+    }
+}
+
+public class PointPolygonTestDemo {
+    public static void main(String[] args) {
+        // Load the native OpenCV library
+        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
+
+        new PointPolygonTest().run();
+    }
+
+}
diff --git a/samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py b/samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py
new file mode 100644
index 0000000000..060167484c
--- /dev/null
+++ b/samples/python/tutorial_code/ShapeDescriptors/bounding_rects_circles/generalContours_demo1.py
@@ -0,0 +1,82 @@
+from __future__ import print_function
+import cv2 as cv
+import numpy as np
+import argparse
+import random as rng
+
+rng.seed(12345)
+
+def thresh_callback(val):
+    threshold = val
+
+    ## [Canny]
+    # Detect edges using Canny
+    canny_output = cv.Canny(src_gray, threshold, threshold * 2)
+    ## [Canny]
+
+    ## [findContours]
+    # Find contours
+    _, contours, _ = cv.findContours(canny_output, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+    ## [findContours]
+
+    ## [allthework]
+    # Approximate contours to polygons + get bounding rects and circles
+    contours_poly = [None]*len(contours)
+    boundRect = [None]*len(contours)
+    centers = [None]*len(contours)
+    radius = [None]*len(contours)
+    for i in range(len(contours)):
+        contours_poly[i] = cv.approxPolyDP(contours[i], 3, True)
+        boundRect[i] = cv.boundingRect(contours_poly[i])
+        centers[i], radius[i] = cv.minEnclosingCircle(contours_poly[i])
+    ## [allthework]
+
+    ## [zeroMat]
+    drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
+    ## [zeroMat]
+
+    ## [forContour]
+    # Draw polygonal contour + bonding rects + circles
+    for i in range(len(contours)):
+        color = (rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))
+        cv.drawContours(drawing, contours_poly, i, color)
+        cv.rectangle(drawing, (int(boundRect[i][0]), int(boundRect[i][1])), \
+          (int(boundRect[i][0]+boundRect[i][2]), int(boundRect[i][1]+boundRect[i][3])), color, 2)
+        cv.circle(drawing, (int(centers[i][0]), int(centers[i][1])), int(radius[i]), color, 2)
+    ## [forContour]
+
+    ## [showDrawings]
+    # Show in a window
+    cv.imshow('Contours', drawing)
+    ## [showDrawings]
+
+## [setup]
+# Load source image
+parser = argparse.ArgumentParser(description='Code for Creating Bounding boxes and circles for contours tutorial.')
+parser.add_argument('--input', help='Path to input image.', default='../data/stuff.jpg')
+args = parser.parse_args()
+
+src = cv.imread(args.input)
+if src is None:
+    print('Could not open or find the image:', args.input)
+    exit(0)
+
+# Convert image to gray and blur it
+src_gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
+src_gray = cv.blur(src_gray, (3,3))
+## [setup]
+
+## [createWindow]
+# Create Window
+source_window = 'Source'
+cv.namedWindow(source_window)
+cv.imshow(source_window, src)
+## [createWindow]
+## [trackbar]
+max_thresh = 255
+thresh = 100 # initial threshold
+cv.createTrackbar('Canny thresh:', source_window, thresh, max_thresh, thresh_callback)
+thresh_callback(thresh)
+## [trackbar]
+
+cv.waitKey()
diff --git a/samples/python/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/generalContours_demo2.py b/samples/python/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/generalContours_demo2.py
new file mode 100644
index 0000000000..a461aba49b
--- /dev/null
+++ b/samples/python/tutorial_code/ShapeDescriptors/bounding_rotated_ellipses/generalContours_demo2.py
@@ -0,0 +1,82 @@
+from __future__ import print_function
+import cv2 as cv
+import numpy as np
+import argparse
+import random as rng
+
+rng.seed(12345)
+
+def thresh_callback(val):
+    threshold = val
+
+    ## [Canny]
+    # Detect edges using Canny
+    canny_output = cv.Canny(src_gray, threshold, threshold * 2)
+    ## [Canny]
+
+    ## [findContours]
+    # Find contours
+    _, contours, _ = cv.findContours(canny_output, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+    ## [findContours]
+
+    # Find the rotated rectangles and ellipses for each contour
+    minRect = [None]*len(contours)
+    minEllipse = [None]*len(contours)
+    for i in range(len(contours)):
+        minRect[i] = cv.minAreaRect(contours[i])
+        if contours[i].shape[0] > 5:
+            minEllipse[i] = cv.fitEllipse(contours[i])
+
+    # Draw contours + rotated rects + ellipses
+    ## [zeroMat]
+    drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
+    ## [zeroMat]
+    ## [forContour]
+    for i in range(len(contours)):
+        color = (rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))
+        # contour
+        cv.drawContours(drawing, contours, i, color)
+        # ellipse
+        if contours[i].shape[0] > 5:
+            cv.ellipse(drawing, minEllipse[i], color, 2)
+        # rotated rectangle
+        box = cv.boxPoints(minRect[i])
+        box = np.intp(box) #np.intp: Integer used for indexing (same as C ssize_t; normally either int32 or int64)
+        cv.drawContours(drawing, [box], 0, color)
+    ## [forContour]
+
+    ## [showDrawings]
+    # Show in a window
+    cv.imshow('Contours', drawing)
+    ## [showDrawings]
+
+## [setup]
+# Load source image
+parser = argparse.ArgumentParser(description='Code for Creating Bounding rotated boxes and ellipses for contours tutorial.')
+parser.add_argument('--input', help='Path to input image.', default='../data/stuff.jpg')
+args = parser.parse_args()
+
+src = cv.imread(args.input)
+if src is None:
+    print('Could not open or find the image:', args.input)
+    exit(0)
+
+# Convert image to gray and blur it
+src_gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
+src_gray = cv.blur(src_gray, (3,3))
+## [setup]
+
+## [createWindow]
+# Create Window
+source_window = 'Source'
+cv.namedWindow(source_window)
+cv.imshow(source_window, src)
+## [createWindow]
+## [trackbar]
+max_thresh = 255
+thresh = 100 # initial threshold
+cv.createTrackbar('Canny Thresh:', source_window, thresh, max_thresh, thresh_callback)
+thresh_callback(thresh)
+## [trackbar]
+
+cv.waitKey()
diff --git a/samples/python/tutorial_code/ShapeDescriptors/find_contours/findContours_demo.py b/samples/python/tutorial_code/ShapeDescriptors/find_contours/findContours_demo.py
new file mode 100644
index 0000000000..f4cbb5f401
--- /dev/null
+++ b/samples/python/tutorial_code/ShapeDescriptors/find_contours/findContours_demo.py
@@ -0,0 +1,50 @@
+from __future__ import print_function
+import cv2 as cv
+import numpy as np
+import argparse
+import random as rng
+
+rng.seed(12345)
+
+def thresh_callback(val):
+    threshold = val
+
+    # Detect edges using Canny
+    canny_output = cv.Canny(src_gray, threshold, threshold * 2)
+
+    # Find contours
+    _, contours, hierarchy = cv.findContours(canny_output, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+
+    # Draw contours
+    drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
+    for i in range(len(contours)):
+        color = (rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))
+        cv.drawContours(drawing, contours, i, color, 2, cv.LINE_8, hierarchy, 0)
+
+    # Show in a window
+    cv.imshow('Contours', drawing)
+
+# Load source image
+parser = argparse.ArgumentParser(description='Code for Finding contours in your image tutorial.')
+parser.add_argument('--input', help='Path to input image.', default='../data/HappyFish.jpg')
+args = parser.parse_args()
+
+src = cv.imread(args.input)
+if src is None:
+    print('Could not open or find the image:', args.input)
+    exit(0)
+
+# Convert image to gray and blur it
+src_gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
+src_gray = cv.blur(src_gray, (3,3))
+
+# Create Window
+source_window = 'Source'
+cv.namedWindow(source_window)
+cv.imshow(source_window, src)
+max_thresh = 255
+thresh = 100 # initial threshold
+cv.createTrackbar('Canny Thresh:', source_window, thresh, max_thresh, thresh_callback)
+thresh_callback(thresh)
+
+cv.waitKey()
diff --git a/samples/python/tutorial_code/ShapeDescriptors/hull/hull_demo.py b/samples/python/tutorial_code/ShapeDescriptors/hull/hull_demo.py
new file mode 100644
index 0000000000..3254941ac0
--- /dev/null
+++ b/samples/python/tutorial_code/ShapeDescriptors/hull/hull_demo.py
@@ -0,0 +1,57 @@
+from __future__ import print_function
+import cv2 as cv
+import numpy as np
+import argparse
+import random as rng
+
+rng.seed(12345)
+
+def thresh_callback(val):
+    threshold = val
+
+    # Detect edges using Canny
+    canny_output = cv.Canny(src_gray, threshold, threshold * 2)
+
+    # Find contours
+    _, contours, _ = cv.findContours(canny_output, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+
+    # Find the convex hull object for each contour
+    hull_list = []
+    for i in range(len(contours)):
+        hull = cv.convexHull(contours[i])
+        hull_list.append(hull)
+
+    # Draw contours + hull results
+    drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
+    for i in range(len(contours)):
+        color = (rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))
+        cv.drawContours(drawing, contours, i, color)
+        cv.drawContours(drawing, hull_list, i, color)
+
+    # Show in a window
+    cv.imshow('Contours', drawing)
+
+# Load source image
+parser = argparse.ArgumentParser(description='Code for Convex Hull tutorial.')
+parser.add_argument('--input', help='Path to input image.', default='../data/stuff.jpg')
+args = parser.parse_args()
+
+src = cv.imread(args.input)
+if src is None:
+    print('Could not open or find the image:', args.input)
+    exit(0)
+
+# Convert image to gray and blur it
+src_gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
+src_gray = cv.blur(src_gray, (3,3))
+
+# Create Window
+source_window = 'Source'
+cv.namedWindow(source_window)
+cv.imshow(source_window, src)
+max_thresh = 255
+thresh = 100 # initial threshold
+cv.createTrackbar('Canny thresh:', source_window, thresh, max_thresh, thresh_callback)
+thresh_callback(thresh)
+
+cv.waitKey()
diff --git a/samples/python/tutorial_code/ShapeDescriptors/moments/moments_demo.py b/samples/python/tutorial_code/ShapeDescriptors/moments/moments_demo.py
new file mode 100644
index 0000000000..c528110ba1
--- /dev/null
+++ b/samples/python/tutorial_code/ShapeDescriptors/moments/moments_demo.py
@@ -0,0 +1,83 @@
+from __future__ import print_function
+from __future__ import division
+import cv2 as cv
+import numpy as np
+import argparse
+import random as rng
+
+rng.seed(12345)
+
+def thresh_callback(val):
+    threshold = val
+
+    ## [Canny]
+    # Detect edges using Canny
+    canny_output = cv.Canny(src_gray, threshold, threshold * 2)
+    ## [Canny]
+
+    ## [findContours]
+    # Find contours
+    _, contours, _ = cv.findContours(canny_output, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+    ## [findContours]
+
+    # Get the moments
+    mu = [None]*len(contours)
+    for i in range(len(contours)):
+        mu[i] = cv.moments(contours[i])
+
+    # Get the mass centers
+    mc = [None]*len(contours)
+    for i in range(len(contours)):
+        # add 1e-5 to avoid division by zero
+        mc[i] = (mu[i]['m10'] / (mu[i]['m00'] + 1e-5), mu[i]['m01'] / (mu[i]['m00'] + 1e-5))
+
+    # Draw contours
+    ## [zeroMat]
+    drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
+    ## [zeroMat]
+    ## [forContour]
+    for i in range(len(contours)):
+        color = (rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))
+        cv.drawContours(drawing, contours, i, color, 2)
+        cv.circle(drawing, (int(mc[i][0]), int(mc[i][1])), 4, color, -1)
+    ## [forContour]
+
+    ## [showDrawings]
+    # Show in a window
+    cv.imshow('Contours', drawing)
+    ## [showDrawings]
+
+    # Calculate the area with the moments 00 and compare with the result of the OpenCV function
+    for i in range(len(contours)):
+        print(' * Contour[%d] - Area (M_00) = %.2f - Area OpenCV: %.2f - Length: %.2f' % (i, mu[i]['m00'], cv.contourArea(contours[i]), cv.arcLength(contours[i], True)))
+
+## [setup]
+# Load source image
+parser = argparse.ArgumentParser(description='Code for Image Moments tutorial.')
+parser.add_argument('--input', help='Path to input image.', default='../data/stuff.jpg')
+args = parser.parse_args()
+
+src = cv.imread(args.input)
+if src is None:
+    print('Could not open or find the image:', args.input)
+    exit(0)
+
+# Convert image to gray and blur it
+src_gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
+src_gray = cv.blur(src_gray, (3,3))
+## [setup]
+
+## [createWindow]
+# Create Window
+source_window = 'Source'
+cv.namedWindow(source_window)
+cv.imshow(source_window, src)
+## [createWindow]
+## [trackbar]
+max_thresh = 255
+thresh = 100 # initial threshold
+cv.createTrackbar('Canny Thresh:', source_window, thresh, max_thresh, thresh_callback)
+thresh_callback(thresh)
+## [trackbar]
+
+cv.waitKey()
diff --git a/samples/python/tutorial_code/ShapeDescriptors/point_polygon_test/pointPolygonTest_demo.py b/samples/python/tutorial_code/ShapeDescriptors/point_polygon_test/pointPolygonTest_demo.py
new file mode 100644
index 0000000000..150727e1d5
--- /dev/null
+++ b/samples/python/tutorial_code/ShapeDescriptors/point_polygon_test/pointPolygonTest_demo.py
@@ -0,0 +1,51 @@
+from __future__ import print_function
+from __future__ import division
+import cv2 as cv
+import numpy as np
+
+# Create an image
+r = 100
+src = np.zeros((4*r, 4*r), dtype=np.uint8)
+
+# Create a sequence of points to make a contour
+vert = [None]*6
+vert[0] = (3*r//2, int(1.34*r))
+vert[1] = (1*r, 2*r)
+vert[2] = (3*r//2, int(2.866*r))
+vert[3] = (5*r//2, int(2.866*r))
+vert[4] = (3*r, 2*r)
+vert[5] = (5*r//2, int(1.34*r))
+
+# Draw it in src
+for i in range(6):
+    cv.line(src, vert[i],  vert[(i+1)%6], ( 255 ), 3)
+
+# Get the contours
+_, contours, _ = cv.findContours(src, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
+
+# Calculate the distances to the contour
+raw_dist = np.empty(src.shape, dtype=np.float32)
+for i in range(src.shape[0]):
+    for j in range(src.shape[1]):
+        raw_dist[i,j] = cv.pointPolygonTest(contours[0], (j,i), True)
+
+minVal, maxVal, _, _ = cv.minMaxLoc(raw_dist)
+minVal = abs(minVal)
+maxVal = abs(maxVal)
+
+# Depicting the  distances graphically
+drawing = np.zeros((src.shape[0], src.shape[1], 3), dtype=np.uint8)
+for i in range(src.shape[0]):
+    for j in range(src.shape[1]):
+        if raw_dist[i,j] < 0:
+            drawing[i,j,0] = 255 - abs(raw_dist[i,j]) * 255 / minVal
+        elif raw_dist[i,j] > 0:
+            drawing[i,j,2] = 255 - raw_dist[i,j] * 255 / maxVal
+        else:
+            drawing[i,j,0] = 255
+            drawing[i,j,1] = 255
+            drawing[i,j,2] = 255
+
+cv.imshow('Source', src)
+cv.imshow('Distance', drawing)
+cv.waitKey()

From 4fe648b15c0cf90e9e47f01454261f731f5e92ea Mon Sep 17 00:00:00 2001
From: yuki takehara <y.takehara1014@gmail.com>
Date: Wed, 13 Jun 2018 03:05:44 +0900
Subject: [PATCH 33/33] Merge pull request #11706 from take1014:setTo_Nan_10507

* setTo_#10507

* setTo_Nan_10507

* setTo: update check / test for NaNs
---
 modules/core/src/copy.cpp      |  7 ++++++-
 modules/core/test/test_mat.cpp | 26 ++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 5e7f4a879a..e67e58b98e 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -463,9 +463,14 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask)
         return false;
 
     if (dst.depth() == CV_32F)
+    {
         for (int i = 0; i < (int)(_val.total()); i++)
-            if (_val.at<double>(i) < iwTypeGetMin(ipp32f) || _val.at<double>(i) > iwTypeGetMax(ipp32f))
+        {
+            float v = (float)(_val.at<double>(i));  // cast to float
+            if (cvIsNaN(v) || cvIsInf(v))  // accept finite numbers only
                 return false;
+        }
+    }
 
     if(dst.dims <= 2)
     {
diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp
index 18906f8dcd..ad480eb8d3 100644
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@@ -1612,6 +1612,32 @@ TEST(Mat, regression_7873_mat_vector_initialize)
     ASSERT_EQ(2, sub_mat.size[2]);
 }
 
+TEST(Mat, regression_10507_mat_setTo)
+{
+    Size sz(6, 4);
+    Mat test_mask(sz, CV_8UC1, cv::Scalar::all(255));
+    test_mask.at<uchar>(1,0) = 0;
+    test_mask.at<uchar>(0,1) = 0;
+    for (int cn = 1; cn <= 4; cn++)
+    {
+        cv::Mat A(sz, CV_MAKE_TYPE(CV_32F, cn), cv::Scalar::all(5));
+        A.setTo(cv::Scalar::all(std::numeric_limits<float>::quiet_NaN()), test_mask);
+        int nans = 0;
+        for (int y = 0; y < A.rows; y++)
+        {
+            for (int x = 0; x < A.cols; x++)
+            {
+                for (int c = 0; c < cn; c++)
+                {
+                    float v = A.ptr<float>(y, x)[c];
+                    nans += (v == v) ? 0 : 1;
+                }
+            }
+        }
+        EXPECT_EQ(nans, cn * (sz.area() - 2)) << "A=" << A << std::endl << "mask=" << test_mask << std::endl;
+    }
+}
+
 #ifdef CV_CXX_STD_ARRAY
 TEST(Core_Mat_array, outputArray_create_getMat)
 {