From 13bc55a015b93cc4fbebf0083cb01fb5c3513225 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Mon, 9 Dec 2019 18:24:36 +0300
Subject: [PATCH 01/11] calib3d: clarify error messages in
 collectCalibrationData()

---
 modules/calib3d/src/calibration.cpp | 42 ++++++++++++++++-------------
 1 file changed, 23 insertions(+), 19 deletions(-)
diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp
index 7a426e530d..9c640540f7 100644
--- a/modules/calib3d/src/calibration.cpp
+++ b/modules/calib3d/src/calibration.cpp
@@ -3115,28 +3115,30 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints,
                                     Mat& npoints )
 {
     int nimages = (int)objectPoints.total();
-    int i, j = 0, ni = 0, total = 0;
-    CV_Assert(nimages > 0 && nimages == (int)imagePoints1.total() &&
-        (!imgPtMat2 || nimages == (int)imagePoints2.total()));
+    int total = 0;
+    CV_Assert(nimages > 0);
+    CV_CheckEQ(nimages, (int)imagePoints1.total(), "");
+    if (imgPtMat2)
+        CV_CheckEQ(nimages, (int)imagePoints2.total(), "");
 
-    for( i = 0; i < nimages; i++ )
+    for (int i = 0; i < nimages; i++)
     {
         Mat objectPoint = objectPoints.getMat(i);
         if (objectPoint.empty())
             CV_Error(CV_StsBadSize, "objectPoints should not contain empty vector of vectors of points");
-        ni = objectPoint.checkVector(3, CV_32F);
-        if( ni <= 0 )
+        int numberOfObjectPoints = objectPoint.checkVector(3, CV_32F);
+        if (numberOfObjectPoints <= 0)
             CV_Error(CV_StsUnsupportedFormat, "objectPoints should contain vector of vectors of points of type Point3f");
 
         Mat imagePoint1 = imagePoints1.getMat(i);
         if (imagePoint1.empty())
             CV_Error(CV_StsBadSize, "imagePoints1 should not contain empty vector of vectors of points");
-        int ni1 = imagePoint1.checkVector(2, CV_32F);
-        if( ni1 <= 0 )
+        int numberOfImagePoints = imagePoint1.checkVector(2, CV_32F);
+        if (numberOfImagePoints <= 0)
             CV_Error(CV_StsUnsupportedFormat, "imagePoints1 should contain vector of vectors of points of type Point2f");
-        CV_Assert( ni == ni1 );
+        CV_CheckEQ(numberOfObjectPoints, numberOfImagePoints, "Number of object and image points must be equal");
 
-        total += ni;
+        total += numberOfObjectPoints;
     }
 
     npoints.create(1, (int)nimages, CV_32S);
@@ -3144,7 +3146,7 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints,
     imgPtMat1.create(1, (int)total, CV_32FC2);
     Point2f* imgPtData2 = 0;
 
-    if( imgPtMat2 )
+    if (imgPtMat2)
     {
         imgPtMat2->create(1, (int)total, CV_32FC2);
         imgPtData2 = imgPtMat2->ptr<Point2f>();
@@ -3153,28 +3155,30 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints,
     Point3f* objPtData = objPtMat.ptr<Point3f>();
     Point2f* imgPtData1 = imgPtMat1.ptr<Point2f>();
 
-    for( i = 0; i < nimages; i++, j += ni )
+    for (int i = 0, j = 0; i < nimages; i++)
     {
         Mat objpt = objectPoints.getMat(i);
         Mat imgpt1 = imagePoints1.getMat(i);
-        ni = objpt.checkVector(3, CV_32F);
-        npoints.at<int>(i) = ni;
-        for (int n = 0; n < ni; ++n)
+        int numberOfObjectPoints = objpt.checkVector(3, CV_32F);
+        npoints.at<int>(i) = numberOfObjectPoints;
+        for (int n = 0; n < numberOfObjectPoints; ++n)
         {
             objPtData[j + n] = objpt.ptr<Point3f>()[n];
             imgPtData1[j + n] = imgpt1.ptr<Point2f>()[n];
         }
 
-        if( imgPtData2 )
+        if (imgPtData2)
         {
             Mat imgpt2 = imagePoints2.getMat(i);
-            int ni2 = imgpt2.checkVector(2, CV_32F);
-            CV_Assert( ni == ni2 );
-            for (int n = 0; n < ni2; ++n)
+            int numberOfImage2Points = imgpt2.checkVector(2, CV_32F);
+            CV_CheckEQ(numberOfObjectPoints, numberOfImage2Points, "Number of object and image(2) points must be equal");
+            for (int n = 0; n < numberOfImage2Points; ++n)
             {
                 imgPtData2[j + n] = imgpt2.ptr<Point2f>()[n];
             }
         }
+
+        j += numberOfObjectPoints;
     }
 }
 

From af04b422c9fc2d10b30a2323175805f5e63863e8 Mon Sep 17 00:00:00 2001
From: Rajkiran Natarajan <saskatchewancatch@gmail.com>
Date: Tue, 17 Dec 2019 20:17:32 -0800
Subject: [PATCH 02/11] Change program type in hdr format files to modern
 value: RADIANCE so modern readers that expect RADIANCE will read it

---
 modules/imgcodecs/src/rgbe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/imgcodecs/src/rgbe.cpp b/modules/imgcodecs/src/rgbe.cpp
index 7942fcd903..cea23f6053 100644
--- a/modules/imgcodecs/src/rgbe.cpp
+++ b/modules/imgcodecs/src/rgbe.cpp
@@ -145,7 +145,7 @@ rgbe2float(float *red, float *green, float *blue, unsigned char rgbe[4])
 /* default minimal header. modify if you want more information in header */
 int RGBE_WriteHeader(FILE *fp, int width, int height, rgbe_header_info *info)
 {
-  const char *programtype = "RGBE";
+  const char *programtype = "RADIANCE";
 
   if (info && (info->valid & RGBE_VALID_PROGRAMTYPE))
     programtype = info->programtype;

From 5d15c65e48eb13509dd8425dc7c4ed3181f818b8 Mon Sep 17 00:00:00 2001
From: mcellis33 <mcellis33@users.noreply.github.com>
Date: Wed, 18 Dec 2019 14:25:59 +0000
Subject: [PATCH 03/11] Merge pull request #16136 from mcellis33:mec-nan

* Handle det == 0 in findCircle3pts.

Issue 16051 shows a case where findCircle3pts returns NaN for the
center coordinates and radius due to dividing by a determinant of 0. In
this case, the points are colinear, so the longest distance between any
2 points is the diameter of the minimum enclosing circle.

* imgproc(test): update test checks for minEnclosingCircle()

* imgproc: fix handling of special cases in minEnclosingCircle()
---
 modules/imgproc/src/shapedescr.cpp     | 47 ++++++++++++++-
 modules/imgproc/test/test_convhull.cpp | 81 ++++++++++++++++++++++++++
 2 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/modules/imgproc/src/shapedescr.cpp b/modules/imgproc/src/shapedescr.cpp
index 436c74eade..8ba4b41424 100644
--- a/modules/imgproc/src/shapedescr.cpp
+++ b/modules/imgproc/src/shapedescr.cpp
@@ -60,6 +60,29 @@ static void findCircle3pts(Point2f *pts, Point2f &center, float &radius)
     Point2f midPoint2 = (pts[0] + pts[2]) / 2.0f;
     float c2 = midPoint2.x * v2.x + midPoint2.y * v2.y;
     float det = v1.x * v2.y - v1.y * v2.x;
+    if (fabs(det) <= EPS)
+    {
+        // v1 and v2 are colinear, so the longest distance between any 2 points
+        // is the diameter of the minimum enclosing circle.
+        float d1 = normL2Sqr<float>(pts[0] - pts[1]);
+        float d2 = normL2Sqr<float>(pts[0] - pts[2]);
+        float d3 = normL2Sqr<float>(pts[1] - pts[2]);
+        radius = sqrt(std::max(d1, std::max(d2, d3))) * 0.5f + EPS;
+        if (d1 >= d2 && d1 >= d3)
+        {
+            center = (pts[0] + pts[1]) * 0.5f;
+        }
+        else if (d2 >= d1 && d2 >= d3)
+        {
+            center = (pts[0] + pts[2]) * 0.5f;
+        }
+        else
+        {
+            CV_DbgAssert(d3 >= d1 && d3 >= d2);
+            center = (pts[1] + pts[2]) * 0.5f;
+        }
+        return;
+    }
     float cx = (c1 * v2.y - c2 * v1.y) / det;
     float cy = (v1.x * c2 - v2.x * c1) / det;
     center.x = (float)cx;
@@ -92,7 +115,13 @@ static void findThirdPoint(const PT *pts, int i, int j, Point2f &center, float &
             ptsf[0] = (Point2f)pts[i];
             ptsf[1] = (Point2f)pts[j];
             ptsf[2] = (Point2f)pts[k];
-            findCircle3pts(ptsf, center, radius);
+            Point2f new_center; float new_radius = 0;
+            findCircle3pts(ptsf, new_center, new_radius);
+            if (new_radius > 0)
+            {
+                radius = new_radius;
+                center = new_center;
+            }
         }
     }
 }
@@ -117,7 +146,13 @@ void findSecondPoint(const PT *pts, int i, Point2f &center, float &radius)
         }
         else
         {
-            findThirdPoint(pts, i, j, center, radius);
+            Point2f new_center; float new_radius = 0;
+            findThirdPoint(pts, i, j, new_center, new_radius);
+            if (new_radius > 0)
+            {
+                radius = new_radius;
+                center = new_center;
+            }
         }
     }
 }
@@ -143,7 +178,13 @@ static void findMinEnclosingCircle(const PT *pts, int count, Point2f &center, fl
         }
         else
         {
-            findSecondPoint(pts, i, center, radius);
+            Point2f new_center; float new_radius = 0;
+            findSecondPoint(pts, i, new_center, new_radius);
+            if (new_radius > 0)
+            {
+                radius = new_radius;
+                center = new_center;
+            }
         }
     }
 }
diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp
index 3f12140328..fc29b7fbb5 100644
--- a/modules/imgproc/test/test_convhull.cpp
+++ b/modules/imgproc/test/test_convhull.cpp
@@ -1084,6 +1084,87 @@ int CV_MinCircleTest2::validate_test_results( int test_case_idx )
     return code;
 }
 
+/****************************************************************************************\
+*                                 minEnclosingCircle Test 3                              *
+\****************************************************************************************/
+
+TEST(Imgproc_minEnclosingCircle, basic_test)
+{
+    vector<Point2f> pts;
+    pts.push_back(Point2f(0, 0));
+    pts.push_back(Point2f(10, 0));
+    pts.push_back(Point2f(5, 1));
+    const float EPS = 1.0e-3f;
+    Point2f center;
+    float radius;
+
+    // pts[2] is within the circle with diameter pts[0] - pts[1].
+    //        2
+    // 0             1
+    // NB: The triangle is obtuse, so the only pts[0] and pts[1] are on the circle.
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
+
+    // pts[2] is on the circle with diameter pts[0] - pts[1].
+    //  2
+    // 0 1
+    pts[2] = Point2f(5, 5);
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
+
+    // pts[2] is outside the circle with diameter pts[0] - pts[1].
+    //   2
+    //
+    //
+    // 0   1
+    // NB: The triangle is acute, so all 3 points are on the circle.
+    pts[2] = Point2f(5, 10);
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 3.75, EPS);
+    EXPECT_NEAR(6.25f, radius, EPS);
+
+    // The 3 points are colinear.
+    pts[2] = Point2f(3, 0);
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
+
+    // 2 points are the same.
+    pts[2] = pts[1];
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 5, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(5, radius, EPS);
+
+    // 3 points are the same.
+    pts[0] = pts[1];
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 10, EPS);
+    EXPECT_NEAR(center.y, 0, EPS);
+    EXPECT_NEAR(0, radius, EPS);
+}
+
+TEST(Imgproc_minEnclosingCircle, regression_16051) {
+    vector<Point2f> pts;
+    pts.push_back(Point2f(85, 1415));
+    pts.push_back(Point2f(87, 1415));
+    pts.push_back(Point2f(89, 1414));
+    pts.push_back(Point2f(89, 1414));
+    pts.push_back(Point2f(87, 1412));
+    Point2f center;
+    float radius;
+    minEnclosingCircle(pts, center, radius);
+    EXPECT_NEAR(center.x, 86.9f, 1e-3);
+    EXPECT_NEAR(center.y, 1414.1f, 1e-3);
+    EXPECT_NEAR(2.1024551f, radius, 1e-3);
+}
+
 /****************************************************************************************\
 *                                   Perimeter Test                                     *
 \****************************************************************************************/

From aa80f754f403bb1e8e3248e201bab090f64cc401 Mon Sep 17 00:00:00 2001
From: antalzsiroscandid <antal.zsiros@candidits.hu>
Date: Wed, 27 Nov 2019 15:31:38 +0100
Subject: [PATCH 04/11] dnn: reading IR models from buffer

---
 modules/dnn/include/opencv2/dnn/dnn.hpp |  47 ++++++++-
 modules/dnn/src/dnn.cpp                 | 128 +++++++++++++++++++++---
 modules/dnn/test/test_misc.cpp          |  54 ++++++++++
 3 files changed, 211 insertions(+), 18 deletions(-)

diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index a4bbffcf41..94e2ada3f1 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -384,7 +384,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         CV_WRAP Net();  //!< Default constructor.
         CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
 
-        /** @brief Create a network from Intel's Model Optimizer intermediate representation.
+        /** @brief Create a network from Intel's Model Optimizer intermediate representation (IR).
          *  @param[in] xml XML configuration file with network's topology.
          *  @param[in] bin Binary file with trained weights.
          *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
@@ -392,6 +392,25 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          */
         CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin);
 
+        /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
+         *  @param[in] bufferModelConfig buffer with model's configuration.
+         *  @param[in] bufferWeights buffer with model's trained weights.
+         *  @returns Net object.
+         */
+        CV_WRAP static
+        Net readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights);
+
+        /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR).
+         *  @param[in] bufferModelConfigPtr buffer pointer of model's configuration.
+         *  @param[in] bufferModelConfigSize buffer size of model's configuration.
+         *  @param[in] bufferWeightsPtr buffer pointer of model's trained weights.
+         *  @param[in] bufferWeightsSize buffer size of model's trained weights.
+         *  @returns Net object.
+         */
+        static
+        Net readFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
+                                            const uchar* bufferWeightsPtr, size_t bufferWeightsSize);
+
         /** Returns true if there are no layers in the network. */
         CV_WRAP bool empty() const;
 
@@ -857,7 +876,31 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
      *  backend.
      */
-    CV_EXPORTS_W Net readNetFromModelOptimizer(const String &xml, const String &bin);
+    CV_EXPORTS_W
+    Net readNetFromModelOptimizer(const String &xml, const String &bin);
+
+    /** @brief Load a network from Intel's Model Optimizer intermediate representation.
+     *  @param[in] bufferModelConfig Buffer contains XML configuration with network's topology.
+     *  @param[in] bufferWeights Buffer contains binary data with trained weights.
+     *  @returns Net object.
+     *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+     *  backend.
+     */
+    CV_EXPORTS_W
+    Net readNetFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights);
+
+    /** @brief Load a network from Intel's Model Optimizer intermediate representation.
+     *  @param[in] bufferModelConfigPtr Pointer to buffer which contains XML configuration with network's topology.
+     *  @param[in] bufferModelConfigSize Binary size of XML configuration data.
+     *  @param[in] bufferWeightsPtr Pointer to buffer which contains binary data with trained weights.
+     *  @param[in] bufferWeightsSize Binary size of trained weights data.
+     *  @returns Net object.
+     *  Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine
+     *  backend.
+     */
+    CV_EXPORTS
+    Net readNetFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
+                                           const uchar* bufferWeightsPtr, size_t bufferWeightsSize);
 
     /** @brief Reads a network model <a href="https://onnx.ai/">ONNX</a>.
      *  @param onnxFile path to the .onnx file with text description of the network architecture.
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index ad2e52766b..e2c296bc03 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -2910,28 +2910,22 @@ struct Net::Impl
         return getBlobAsync(getPinByAlias(outputName));
     }
 #endif  // CV_CXX11
+
+#ifdef HAVE_INF_ENGINE
+    static
+    Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
+#endif
 };
 
 Net::Net() : impl(new Net::Impl)
 {
 }
 
-Net Net::readFromModelOptimizer(const String& xml, const String& bin)
+#ifdef HAVE_INF_ENGINE
+/*static*/
+Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
 {
-#ifndef HAVE_INF_ENGINE
-    CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
-#else
-
-#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
-    InferenceEngine::CNNNetReader reader;
-    reader.ReadNetwork(xml);
-    reader.ReadWeights(bin);
-
-    InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
-#else
-    InferenceEngine::Core& ie = getCore();
-    InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
-#endif
+    CV_TRACE_FUNCTION();
 
     std::vector<String> inputsNames;
     for (auto& it : ieNet.getInputsInfo())
@@ -3001,9 +2995,95 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
 
     cvNet.impl->skipInfEngineInit = true;
     return cvNet;
+}
+#endif  // HAVE_INF_ENGINE
+
+Net Net::readFromModelOptimizer(const String& xml, const String& bin)
+{
+    CV_TRACE_FUNCTION();
+#ifndef HAVE_INF_ENGINE
+    CV_UNUSED(xml); CV_UNUSED(bin);
+    CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
+#else
+#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
+    InferenceEngine::CNNNetReader reader;
+    reader.ReadNetwork(xml);
+    reader.ReadWeights(bin);
+
+    InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
+#else
+    InferenceEngine::Core& ie = getCore();
+    InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
+#endif
+
+    return Impl::createNetworkFromModelOptimizer(ieNet);
 #endif  // HAVE_INF_ENGINE
 }
 
+Net Net::readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights)
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(!bufferModelConfig.empty());
+    CV_Assert(!bufferWeights.empty());
+    return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(),
+                                           bufferWeights.data(), bufferWeights.size());
+}
+
+Net Net::readFromModelOptimizer(
+        const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
+        const uchar* bufferWeightsPtr, size_t bufferWeightsSize
+)
+{
+    CV_TRACE_FUNCTION();
+#ifndef HAVE_INF_ENGINE
+    CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr);
+    CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize);
+    CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
+#else
+
+#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
+    InferenceEngine::CNNNetReader reader;
+
+    try
+    {
+        reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize);
+
+        InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
+        InferenceEngine::TBlob<uint8_t>::Ptr weightsBlobPtr(new InferenceEngine::TBlob<uint8_t>(tensorDesc));
+        weightsBlobPtr->allocate();
+        std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize);
+        reader.SetWeights(weightsBlobPtr);
+    }
+    catch (const std::exception& e)
+    {
+        CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
+    }
+
+    InferenceEngine::CNNNetwork ieNet = reader.getNetwork();
+#else
+    InferenceEngine::Core& ie = getCore();
+
+    std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
+
+    InferenceEngine::CNNNetwork ieNet;
+    try
+    {
+        InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
+        InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize);
+
+        ieNet = ie.ReadNetwork(model, weights_blob);
+    }
+    catch (const std::exception& e)
+    {
+        CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
+    }
+#endif
+
+    return Impl::createNetworkFromModelOptimizer(ieNet);
+#endif  // HAVE_INF_ENGINE
+}
+
+
 Net::~Net()
 {
 }
@@ -4344,7 +4424,7 @@ Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
     else if (framework == "torch")
         CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
     else if (framework == "dldt")
-        CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers");
+        return readNetFromModelOptimizer(bufferConfig, bufferModel);
     CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
 }
 
@@ -4353,5 +4433,21 @@ Net readNetFromModelOptimizer(const String &xml, const String &bin)
     return Net::readFromModelOptimizer(xml, bin);
 }
 
+Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
+{
+    return Net::readFromModelOptimizer(bufferCfg, bufferModel);
+}
+
+Net readNetFromModelOptimizer(
+        const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
+        const uchar* bufferWeightsPtr, size_t bufferWeightsSize
+)
+{
+    return Net::readFromModelOptimizer(
+        bufferModelConfigPtr, bufferModelConfigSize,
+        bufferWeightsPtr, bufferWeightsSize
+    );
+}
+
 CV__DNN_EXPERIMENTAL_NS_END
 }} // namespace
diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp
index 464ef104b3..2069b97419 100644
--- a/modules/dnn/test/test_misc.cpp
+++ b/modules/dnn/test/test_misc.cpp
@@ -637,6 +637,60 @@ TEST_P(Test_Model_Optimizer, forward_two_nets)
 
     normAssert(ref0, ref2, 0, 0);
 }
+
+TEST_P(Test_Model_Optimizer, readFromBuffer)
+{
+    const Backend backendId = get<0>(GetParam());
+    const Target targetId = get<1>(GetParam());
+
+    if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        throw SkipTestException("No support for async forward");
+
+    const std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : "";
+    const std::string& weightsFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin");
+    const std::string& modelFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml");
+
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API);
+    else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
+    else
+        FAIL() << "Unknown backendId";
+
+    Net net1 = readNetFromModelOptimizer(modelFile, weightsFile);
+    net1.setPreferableBackend(backendId);
+    net1.setPreferableTarget(targetId);
+
+
+    std::vector<char> modelConfig;
+    readFileContent(modelFile, modelConfig);
+    std::vector<char> weights;
+    readFileContent(weightsFile, weights);
+
+    Net net2 = readNetFromModelOptimizer(
+            (const uchar*)modelConfig.data(), modelConfig.size(),
+            (const uchar*)weights.data(), weights.size()
+    );
+    net2.setPreferableBackend(backendId);
+    net2.setPreferableTarget(targetId);
+
+    int blobSize[] = {2, 6, 75, 113};
+    Mat input(4, &blobSize[0], CV_32F);
+    randu(input, 0, 255);
+
+    Mat ref, actual;
+    {
+        net1.setInput(input);
+        ref = net1.forward();
+    }
+    {
+        net2.setInput(input);
+        actual = net2.forward();
+    }
+
+    normAssert(ref, actual, "", 0, 0);
+}
+
 INSTANTIATE_TEST_CASE_P(/**/, Test_Model_Optimizer,
     dnnBackendsAndTargetsIE()
 );

From f5a84f75c4427e0754138264dbce0b55a80d5d38 Mon Sep 17 00:00:00 2001
From: Vitaly Tuzov <vitaly.tuzov@intel.com>
Date: Wed, 18 Dec 2019 20:05:36 +0300
Subject: [PATCH 05/11] Fix for CV_8UC2 linear resize vectorization

---
 modules/imgproc/src/resize.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp
index df127d1c43..cc967cf469 100644
--- a/modules/imgproc/src/resize.cpp
+++ b/modules/imgproc/src/resize.cpp
@@ -1605,13 +1605,14 @@ struct HResizeLinearVecU8_X4
 
                 for( dx = 0; dx < len0; dx += step )
                 {
+                    int ofs[4] = { xofs[dx], xofs[dx + 2], xofs[dx + 4], xofs[dx + 6] };
                     v_int16x8 al = v_load(alpha+dx*2);
                     v_int16x8 ah = v_load(alpha+dx*2+8);
                     v_uint16x8 sl, sh;
-                    v_expand(v_interleave_pairs(v_lut_quads(S0, xofs+dx)), sl, sh);
+                    v_expand(v_interleave_pairs(v_lut_quads(S0, ofs)), sl, sh);
                     v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
                     v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
-                    v_expand(v_interleave_pairs(v_lut_pairs(S1, xofs+dx)), sl, sh);
+                    v_expand(v_interleave_pairs(v_lut_quads(S1, ofs)), sl, sh);
                     v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
                     v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
                 }
@@ -1622,10 +1623,11 @@ struct HResizeLinearVecU8_X4
                 int *D = dst[k];
                 for( dx = 0; dx < len0; dx += step )
                 {
+                    int ofs[4] = { xofs[dx], xofs[dx + 2], xofs[dx + 4], xofs[dx + 6] };
                     v_int16x8 al = v_load(alpha+dx*2);
                     v_int16x8 ah = v_load(alpha+dx*2+8);
                     v_uint16x8 sl, sh;
-                    v_expand(v_interleave_pairs(v_lut_quads(S, xofs+dx)), sl, sh);
+                    v_expand(v_interleave_pairs(v_lut_quads(S, ofs)), sl, sh);
                     v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(sl), al));
                     v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah));
                 }

From 28a5f7d66bed5b562cba38bf588dc573c26c24f5 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Wed, 18 Dec 2019 23:14:38 +0000
Subject: [PATCH 06/11] 3rdparty: TBB version 2019u8 => 2020.0

---
 3rdparty/tbb/CMakeLists.txt | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt
index a3c0a812ce..67eae7dc0a 100644
--- a/3rdparty/tbb/CMakeLists.txt
+++ b/3rdparty/tbb/CMakeLists.txt
@@ -5,10 +5,11 @@ if (WIN32 AND NOT ARM)
   message(FATAL_ERROR "BUILD_TBB option supports Windows on ARM only!\nUse regular official TBB build instead of the BUILD_TBB option!")
 endif()
 
-ocv_update(OPENCV_TBB_RELEASE "2019_U8")
-ocv_update(OPENCV_TBB_RELEASE_MD5 "7c371d0f62726154d2c568a85697a0ad")
+ocv_update(OPENCV_TBB_RELEASE "v2020.0")
+ocv_update(OPENCV_TBB_RELEASE_MD5 "5858dd01ec007c139d5d178b21e06dae")
 ocv_update(OPENCV_TBB_FILENAME "${OPENCV_TBB_RELEASE}.tar.gz")
-ocv_update(OPENCV_TBB_SUBDIR "tbb-${OPENCV_TBB_RELEASE}")
+string(REGEX REPLACE "^v" "" OPENCV_TBB_RELEASE_ "${OPENCV_TBB_RELEASE}")
+ocv_update(OPENCV_TBB_SUBDIR "tbb-${OPENCV_TBB_RELEASE_}")
 
 set(tbb_src_dir "${OpenCV_BINARY_DIR}/3rdparty/tbb")
 ocv_download(FILENAME ${OPENCV_TBB_FILENAME}
@@ -34,10 +35,12 @@ ocv_include_directories("${tbb_src_dir}/include"
 file(GLOB lib_srcs "${tbb_src_dir}/src/tbb/*.cpp")
 file(GLOB lib_hdrs "${tbb_src_dir}/src/tbb/*.h")
 list(APPEND lib_srcs "${tbb_src_dir}/src/rml/client/rml_tbb.cpp")
+ocv_list_filterout(lib_srcs "${tbb_src_dir}/src/tbb/tbbbind.cpp")  # hwloc.h requirement
 
 if (WIN32)
   add_definitions(/D__TBB_DYNAMIC_LOAD_ENABLED=0
                   /D__TBB_BUILD=1
+                  /DTBB_SUPPRESS_DEPRECATED_MESSAGES=1
                   /DTBB_NO_LEGACY=1
                   /D_UNICODE
                   /DUNICODE

From 8d22ac200f488eb76d86cdf6ad12581df2980095 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Wed, 11 Dec 2019 20:08:10 +0000
Subject: [PATCH 07/11] core: workaround flipHoriz() alignment issues

---
 modules/core/include/opencv2/core/utility.hpp | 37 +++++++++
 modules/core/src/copy.cpp                     | 77 ++++++++++++++++---
 2 files changed, 102 insertions(+), 12 deletions(-)

diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp
index e7f169b01a..063747e730 100644
--- a/modules/core/include/opencv2/core/utility.hpp
+++ b/modules/core/include/opencv2/core/utility.hpp
@@ -514,6 +514,43 @@ static inline size_t roundUp(size_t a, unsigned int b)
     return a + b - 1 - (a + b - 1) % b;
 }
 
+/** @brief Alignment check of passed values
+
+Usage: `isAligned<sizeof(int)>(...)`
+
+@note Alignment(N) must be a power of 2 (2**k, 2^k)
+*/
+template<int N, typename T> static inline
+bool isAligned(const T& data)
+{
+    CV_StaticAssert((N & (N - 1)) == 0, "");  // power of 2
+    return (((size_t)data) & (N - 1)) == 0;
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1)
+{
+    return isAligned<N>((size_t)p1);
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1, const void* p2)
+{
+    return isAligned<N>(((size_t)p1)|((size_t)p2));
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1, const void* p2, const void* p3)
+{
+    return isAligned<N>(((size_t)p1)|((size_t)p2)|((size_t)p3));
+}
+/** @overload */
+template<int N> static inline
+bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4)
+{
+    return isAligned<N>(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4));
+}
+
 /** @brief Enables or disables the optimized code.
 
 The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2,
diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 3f68a2555a..3fa498286a 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -563,6 +563,12 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask)
     return *this;
 }
 
+#if CV_NEON && !defined(__aarch64__)
+#define CV_CHECK_ALIGNMENT 1
+#else
+#define CV_CHECK_ALIGNMENT 0
+#endif
+
 #if CV_SIMD128
 template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
 {
@@ -572,6 +578,10 @@ template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s
     int width_1 = width & -v_uint8x16::nlanes;
     int i, j;
 
+#if CV_CHECK_ALIGNMENT
+    CV_Assert(isAligned<sizeof(T)>(src, dst));
+#endif
+
     for( ; size.height--; src += sstep, dst += dstep )
     {
         for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
@@ -585,7 +595,7 @@ template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s
             v_store((T*)(dst + j - v_uint8x16::nlanes), t0);
             v_store((T*)(dst + i), t1);
         }
-        if (((size_t)src|(size_t)dst) % sizeof(T) == 0)
+        if (isAligned<sizeof(T)>(src, dst))
         {
             for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
             {
@@ -620,6 +630,11 @@ template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const
     int end = (int)(size.width*esz);
     int width = (end + 1)/2;
 
+#if CV_CHECK_ALIGNMENT
+    CV_Assert(isAligned<sizeof(T1)>(src, dst));
+    CV_Assert(isAligned<sizeof(T2)>(src, dst));
+#endif
+
     for( ; size.height--; src += sstep, dst += dstep )
     {
         for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) )
@@ -644,6 +659,9 @@ static void
 flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
 {
 #if CV_SIMD
+#if CV_CHECK_ALIGNMENT
+    size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep;
+#endif
     if (esz == 2 * v_uint8x16::nlanes)
     {
         int end = (int)(size.width*esz);
@@ -693,15 +711,27 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
             }
         }
     }
-    else if (esz == 8)
+    else if (esz == 8
+#if CV_CHECK_ALIGNMENT
+            && isAligned<sizeof(uint64)>(alignmentMark)
+#endif
+    )
     {
         flipHoriz_single<v_uint64x2>(src, sstep, dst, dstep, size, esz);
     }
-    else if (esz == 4)
+    else if (esz == 4
+#if CV_CHECK_ALIGNMENT
+            && isAligned<sizeof(unsigned)>(alignmentMark)
+#endif
+    )
     {
         flipHoriz_single<v_uint32x4>(src, sstep, dst, dstep, size, esz);
     }
-    else if (esz == 2)
+    else if (esz == 2
+#if CV_CHECK_ALIGNMENT
+            && isAligned<sizeof(ushort)>(alignmentMark)
+#endif
+    )
     {
         flipHoriz_single<v_uint16x8>(src, sstep, dst, dstep, size, esz);
     }
@@ -709,7 +739,11 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
     {
         flipHoriz_single<v_uint8x16>(src, sstep, dst, dstep, size, esz);
     }
-    else if (esz == 24)
+    else if (esz == 24
+#if CV_CHECK_ALIGNMENT
+            && isAligned<sizeof(uint64_t)>(alignmentMark)
+#endif
+    )
     {
         int end = (int)(size.width*esz);
         int width = (end + 1)/2;
@@ -732,6 +766,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
             }
         }
     }
+#if !CV_CHECK_ALIGNMENT
     else if (esz == 12)
     {
         flipHoriz_double<uint64_t,uint>(src, sstep, dst, dstep, size, esz);
@@ -744,8 +779,9 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
     {
         flipHoriz_double<ushort,uchar>(src, sstep, dst, dstep, size, esz);
     }
-    else
 #endif
+    else
+#endif // CV_SIMD
     {
         int i, j, limit = (int)(((size.width + 1)/2)*esz);
         AutoBuffer<int> _tab(size.width*esz);
@@ -779,16 +815,33 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
     {
         int i = 0;
 #if CV_SIMD
-        for( ; i <= size.width - (v_int32::nlanes * 4); i += v_int32::nlanes * 4 )
+#if CV_CHECK_ALIGNMENT
+        if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
+#endif
         {
-            v_int32 t0 = vx_load((int*)(src0 + i));
-            v_int32 t1 = vx_load((int*)(src1 + i));
-            vx_store((int*)(dst0 + i), t1);
-            vx_store((int*)(dst1 + i), t0);
+            for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
+            {
+                v_int32 t0 = vx_load((int*)(src0 + i));
+                v_int32 t1 = vx_load((int*)(src1 + i));
+                vx_store((int*)(dst0 + i), t1);
+                vx_store((int*)(dst1 + i), t0);
+            }
         }
+#if CV_CHECK_ALIGNMENT
+        else
+        {
+            for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
+            {
+                v_uint8 t0 = vx_load(src0 + i);
+                v_uint8 t1 = vx_load(src1 + i);
+                vx_store(dst0 + i, t1);
+                vx_store(dst1 + i, t0);
+            }
+        }
+#endif
 #endif
 
-        if( ((size_t)src0|(size_t)dst0|(size_t)src1|(size_t)dst1) % sizeof(int) == 0 )
+        if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
         {
             for( ; i <= size.width - 16; i += 16 )
             {

From 9cd1d087c38dfa41d4606b53a40496871ae4e875 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Thu, 19 Dec 2019 00:29:53 +0000
Subject: [PATCH 08/11] android(camera2): apply .disconnectCamera() patch from
 issue 13574

---
 .../java/org/opencv/android/JavaCamera2View.java           | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java
index 09e01b01fc..5eac339c12 100644
--- a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java
+++ b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java
@@ -230,7 +230,7 @@ public class JavaCamera2View extends CameraBridgeViewBase {
 
     @Override
     protected void disconnectCamera() {
-        Log.i(LOGTAG, "closeCamera");
+        Log.i(LOGTAG, "close camera");
         try {
             CameraDevice c = mCameraDevice;
             mCameraDevice = null;
@@ -241,13 +241,14 @@ public class JavaCamera2View extends CameraBridgeViewBase {
             if (null != c) {
                 c.close();
             }
+        } finally {
+            stopBackgroundThread();
             if (null != mImageReader) {
                 mImageReader.close();
                 mImageReader = null;
             }
-        } finally {
-            stopBackgroundThread();
         }
+        Log.i(LOGTAG, "camera closed!");
     }
 
     public static class JavaCameraSizeAccessor implements ListItemAccessor {

From e801f0e954a4d095b0240c0d83cfb5376a6f0e85 Mon Sep 17 00:00:00 2001
From: Sebastien Wybo <sebastien-wybo@users.noreply.github.com>
Date: Thu, 19 Dec 2019 10:59:18 +0100
Subject: [PATCH 09/11] Merge pull request #16011 from sebastien-wybo:fix_16007

* Fix #16007 - colinearity computed using all 3 coordinates

* calib3d(test): estimateAffine3D regression 16007
---
 modules/calib3d/src/ptsetreg.cpp                 |  6 +++---
 modules/calib3d/test/test_affine3d_estimator.cpp | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/modules/calib3d/src/ptsetreg.cpp b/modules/calib3d/src/ptsetreg.cpp
index 88e1815c35..9b2ec7857d 100644
--- a/modules/calib3d/src/ptsetreg.cpp
+++ b/modules/calib3d/src/ptsetreg.cpp
@@ -488,13 +488,13 @@ public:
             for(j = 0; j < i; ++j)
             {
                 Point3f d1 = ptr[j] - ptr[i];
-                float n1 = d1.x*d1.x + d1.y*d1.y;
+                float n1 = d1.x*d1.x + d1.y*d1.y + d1.z*d1.z;
 
                 for(k = 0; k < j; ++k)
                 {
                     Point3f d2 = ptr[k] - ptr[i];
-                    float denom = (d2.x*d2.x + d2.y*d2.y)*n1;
-                    float num = d1.x*d2.x + d1.y*d2.y;
+                    float denom = (d2.x*d2.x + d2.y*d2.y + d2.z*d2.z)*n1;
+                    float num = d1.x*d2.x + d1.y*d2.y + d1.z*d2.z;
 
                     if( num*num > threshold*threshold*denom )
                         return false;
diff --git a/modules/calib3d/test/test_affine3d_estimator.cpp b/modules/calib3d/test/test_affine3d_estimator.cpp
index 9c3821bbc4..dba09afa40 100644
--- a/modules/calib3d/test/test_affine3d_estimator.cpp
+++ b/modules/calib3d/test/test_affine3d_estimator.cpp
@@ -192,4 +192,18 @@ void CV_Affine3D_EstTest::run( int /* start_from */)
 
 TEST(Calib3d_EstimateAffine3D, accuracy) { CV_Affine3D_EstTest test; test.safe_run(); }
 
+TEST(Calib3d_EstimateAffine3D, regression_16007)
+{
+    std::vector<cv::Point3f> m1, m2;
+    m1.push_back(Point3f(1.0f, 0.0f, 0.0f)); m2.push_back(Point3f(1.0f, 1.0f, 0.0f));
+    m1.push_back(Point3f(1.0f, 0.0f, 1.0f)); m2.push_back(Point3f(1.0f, 1.0f, 1.0f));
+    m1.push_back(Point3f(0.5f, 0.0f, 0.5f)); m2.push_back(Point3f(0.5f, 1.0f, 0.5f));
+    m1.push_back(Point3f(2.5f, 0.0f, 2.5f)); m2.push_back(Point3f(2.5f, 1.0f, 2.5f));
+    m1.push_back(Point3f(2.0f, 0.0f, 1.0f)); m2.push_back(Point3f(2.0f, 1.0f, 1.0f));
+
+    cv::Mat m3D, inl;
+    int res = cv::estimateAffine3D(m1, m2, m3D, inl);
+    EXPECT_EQ(1, res);
+}
+
 }} // namespace

From 5bf73457431b7d2cb87ac8c107865388dbf66642 Mon Sep 17 00:00:00 2001
From: jeffeDurand <jfdurand.droid@gmail.com>
Date: Thu, 19 Dec 2019 05:02:48 -0500
Subject: [PATCH 10/11] Merge pull request #16090 from
 jeffeDurand:cuda_mog2_issue_5296

* cuda_mog2_issue_5296
---
 modules/cudabgsegm/src/cuda/mog2.cu  | 581 +++++++++++++--------------
 modules/cudabgsegm/src/cuda/mog2.hpp |  37 ++
 modules/cudabgsegm/src/mog2.cpp      | 354 ++++++++--------
 3 files changed, 493 insertions(+), 479 deletions(-)
 create mode 100644 modules/cudabgsegm/src/cuda/mog2.hpp

diff --git a/modules/cudabgsegm/src/cuda/mog2.cu b/modules/cudabgsegm/src/cuda/mog2.cu
index 789afa47a9..46891c688f 100644
--- a/modules/cudabgsegm/src/cuda/mog2.cu
+++ b/modules/cudabgsegm/src/cuda/mog2.cu
@@ -47,393 +47,372 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/limits.hpp"
 
-namespace cv { namespace cuda { namespace device
-{
-    namespace mog2
-    {
-        ///////////////////////////////////////////////////////////////
-        // Utility
-
-        __device__ __forceinline__ float cvt(uchar val)
-        {
-            return val;
-        }
-        __device__ __forceinline__ float3 cvt(const uchar3& val)
-        {
-            return make_float3(val.x, val.y, val.z);
-        }
-        __device__ __forceinline__ float4 cvt(const uchar4& val)
-        {
-            return make_float4(val.x, val.y, val.z, val.w);
-        }
-
-        __device__ __forceinline__ float sqr(float val)
-        {
-            return val * val;
-        }
-        __device__ __forceinline__ float sqr(const float3& val)
-        {
-            return val.x * val.x + val.y * val.y + val.z * val.z;
-        }
-        __device__ __forceinline__ float sqr(const float4& val)
-        {
-            return val.x * val.x + val.y * val.y + val.z * val.z;
-        }
+#include "mog2.hpp"
 
-        __device__ __forceinline__ float sum(float val)
-        {
-            return val;
-        }
-        __device__ __forceinline__ float sum(const float3& val)
-        {
-            return val.x + val.y + val.z;
-        }
-        __device__ __forceinline__ float sum(const float4& val)
-        {
-            return val.x + val.y + val.z;
-        }
-
-        template <class Ptr2D>
-        __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
-        {
-            typename Ptr2D::elem_type val = ptr(k * rows + y, x);
-            ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
-            ptr((k + 1) * rows + y, x) = val;
-        }
-
-        ///////////////////////////////////////////////////////////////
-        // MOG2
+namespace cv
+{
+namespace cuda
+{
+namespace device
+{
+namespace mog2
+{
+///////////////////////////////////////////////////////////////
+// Utility
 
-        __constant__ int           c_nmixtures;
-        __constant__ float         c_Tb;
-        __constant__ float         c_TB;
-        __constant__ float         c_Tg;
-        __constant__ float         c_varInit;
-        __constant__ float         c_varMin;
-        __constant__ float         c_varMax;
-        __constant__ float         c_tau;
-        __constant__ unsigned char c_shadowVal;
+__device__ __forceinline__ float cvt(uchar val)
+{
+    return val;
+}
+__device__ __forceinline__ float3 cvt(const uchar3 &val)
+{
+    return make_float3(val.x, val.y, val.z);
+}
+__device__ __forceinline__ float4 cvt(const uchar4 &val)
+{
+    return make_float4(val.x, val.y, val.z, val.w);
+}
 
-        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
-        {
-            varMin = ::fminf(varMin, varMax);
-            varMax = ::fmaxf(varMin, varMax);
-
-            cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
-            cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
-        }
+__device__ __forceinline__ float sqr(float val)
+{
+    return val * val;
+}
+__device__ __forceinline__ float sqr(const float3 &val)
+{
+    return val.x * val.x + val.y * val.y + val.z * val.z;
+}
+__device__ __forceinline__ float sqr(const float4 &val)
+{
+    return val.x * val.x + val.y * val.y + val.z * val.z;
+}
 
-        template <bool detectShadows, typename SrcT, typename WorkT>
-        __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
-                             PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
-                             const float alphaT, const float alpha1, const float prune)
-        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+__device__ __forceinline__ float sum(float val)
+{
+    return val;
+}
+__device__ __forceinline__ float sum(const float3 &val)
+{
+    return val.x + val.y + val.z;
+}
+__device__ __forceinline__ float sum(const float4 &val)
+{
+    return val.x + val.y + val.z;
+}
 
-            if (x >= frame.cols || y >= frame.rows)
-                return;
+template <class Ptr2D>
+__device__ __forceinline__ void swap(Ptr2D &ptr, int x, int y, int k, int rows)
+{
+    typename Ptr2D::elem_type val = ptr(k * rows + y, x);
+    ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
+    ptr((k + 1) * rows + y, x) = val;
+}
+
+///////////////////////////////////////////////////////////////
+// MOG2
+
+template <bool detectShadows, typename SrcT, typename WorkT>
+__global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
+                     PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
+                     const float alphaT, const float alpha1, const float prune, const Constants *const constants)
+{
+    const int x = blockIdx.x * blockDim.x + threadIdx.x;
+    const int y = blockIdx.y * blockDim.y + threadIdx.y;
 
-            WorkT pix = cvt(frame(y, x));
+    if (x < frame.cols && y < frame.rows)
+    {
+        WorkT pix = cvt(frame(y, x));
 
-            //calculate distances to the modes (+ sort)
-            //here we need to go in descending order!!!
+        //calculate distances to the modes (+ sort)
+        //here we need to go in descending order!!!
 
-            bool background = false; // true - the pixel classified as background
+        bool background = false; // true - the pixel classified as background
 
-            //internal:
+        //internal:
 
-            bool fitsPDF = false; //if it remains zero a new GMM mode will be added
+        bool fitsPDF = false; //if it remains zero a new GMM mode will be added
 
-            int nmodes = modesUsed(y, x);
-            int nNewModes = nmodes; //current number of modes in GMM
+        int nmodes = modesUsed(y, x);
+        const int nNewModes = nmodes; //current number of modes in GMM
 
-            float totalWeight = 0.0f;
+        float totalWeight = 0.0f;
 
-            //go through all modes
+        //go through all modes
 
-            for (int mode = 0; mode < nmodes; ++mode)
+        for (int mode = 0; mode < nmodes; ++mode)
+        {
+            //need only weight if fit is found
+            float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
+            int swap_count = 0;
+            //fit not found yet
+            if (!fitsPDF)
             {
-                //need only weight if fit is found
-                float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
-                int swap_count = 0;
-                //fit not found yet
-                if (!fitsPDF)
-                {
-                    //check if it belongs to some of the remaining modes
-                    float var = gmm_variance(mode * frame.rows + y, x);
-
-                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
+                //check if it belongs to some of the remaining modes
+                const float var = gmm_variance(mode * frame.rows + y, x);
 
-                    //calculate difference and distance
-                    WorkT diff = mean - pix;
-                    float dist2 = sqr(diff);
+                const WorkT mean = gmm_mean(mode * frame.rows + y, x);
 
-                    //background? - Tb - usually larger than Tg
-                    if (totalWeight < c_TB && dist2 < c_Tb * var)
-                        background = true;
+                //calculate difference and distance
+                const WorkT diff = mean - pix;
+                const float dist2 = sqr(diff);
 
-                    //check fit
-                    if (dist2 < c_Tg * var)
-                    {
-                        //belongs to the mode
-                        fitsPDF = true;
+                //background? - Tb - usually larger than Tg
+                if (totalWeight < constants->TB_ && dist2 < constants->Tb_ * var)
+                    background = true;
 
-                        //update distribution
+                //check fit
+                if (dist2 < constants->Tg_ * var)
+                {
+                    //belongs to the mode
+                    fitsPDF = true;
 
-                        //update weight
-                        weight += alphaT;
-                        float k = alphaT / weight;
+                    //update distribution
 
-                        //update mean
-                        gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
+                    //update weight
+                    weight += alphaT;
+                    float k = alphaT / weight;
 
-                        //update variance
-                        float varnew = var + k * (dist2 - var);
+                    //update mean
+                    gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
 
-                        //limit the variance
-                        varnew = ::fmaxf(varnew, c_varMin);
-                        varnew = ::fminf(varnew, c_varMax);
+                    //update variance
+                    float varnew = var + k * (dist2 - var);
 
-                        gmm_variance(mode * frame.rows + y, x) = varnew;
+                    //limit the variance
+                    varnew = ::fmaxf(varnew, constants->varMin_);
+                    varnew = ::fminf(varnew, constants->varMax_);
 
-                        //sort
-                        //all other weights are at the same place and
-                        //only the matched (iModes) is higher -> just find the new place for it
+                    gmm_variance(mode * frame.rows + y, x) = varnew;
 
-                        for (int i = mode; i > 0; --i)
-                        {
-                            //check one up
-                            if (weight < gmm_weight((i - 1) * frame.rows + y, x))
-                                break;
+                    //sort
+                    //all other weights are at the same place and
+                    //only the matched (iModes) is higher -> just find the new place for it
 
-                            swap_count++;
-                            //swap one up
-                            swap(gmm_weight, x, y, i - 1, frame.rows);
-                            swap(gmm_variance, x, y, i - 1, frame.rows);
-                            swap(gmm_mean, x, y, i - 1, frame.rows);
-                        }
+                    for (int i = mode; i > 0; --i)
+                    {
+                        //check one up
+                        if (weight < gmm_weight((i - 1) * frame.rows + y, x))
+                            break;
 
-                        //belongs to the mode - bFitsPDF becomes 1
+                        swap_count++;
+                        //swap one up
+                        swap(gmm_weight, x, y, i - 1, frame.rows);
+                        swap(gmm_variance, x, y, i - 1, frame.rows);
+                        swap(gmm_mean, x, y, i - 1, frame.rows);
                     }
-                } // !fitsPDF
 
-                //check prune
-                if (weight < -prune)
-                {
-                    weight = 0.0f;
-                    nmodes--;
+                    //belongs to the mode - bFitsPDF becomes 1
                 }
+            } // !fitsPDF
 
-                gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
-                totalWeight += weight;
+            //check prune
+            if (weight < -prune)
+            {
+                weight = 0.0f;
+                nmodes--;
             }
 
-            //renormalize weights
+            gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
+            totalWeight += weight;
+        }
 
-            totalWeight = 1.f / totalWeight;
-            for (int mode = 0; mode < nmodes; ++mode)
-                gmm_weight(mode * frame.rows + y, x) *= totalWeight;
+        //renormalize weights
 
-            nmodes = nNewModes;
+        totalWeight = 1.f / totalWeight;
+        for (int mode = 0; mode < nmodes; ++mode)
+            gmm_weight(mode * frame.rows + y, x) *= totalWeight;
 
-            //make new mode if needed and exit
+        nmodes = nNewModes;
 
-            if (!fitsPDF)
-            {
-                // replace the weakest or add a new one
-                int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
+        //make new mode if needed and exit
 
-                if (nmodes == 1)
-                    gmm_weight(mode * frame.rows + y, x) = 1.f;
-                else
-                {
-                    gmm_weight(mode * frame.rows + y, x) = alphaT;
+        if (!fitsPDF)
+        {
+            // replace the weakest or add a new one
+            const int mode = nmodes == constants->nmixtures_ ? constants->nmixtures_ - 1 : nmodes++;
 
-                    // renormalize all other weights
+            if (nmodes == 1)
+                gmm_weight(mode * frame.rows + y, x) = 1.f;
+            else
+            {
+                gmm_weight(mode * frame.rows + y, x) = alphaT;
 
-                    for (int i = 0; i < nmodes - 1; ++i)
-                        gmm_weight(i * frame.rows + y, x) *= alpha1;
-                }
+                // renormalize all other weights
 
-                // init
+                for (int i = 0; i < nmodes - 1; ++i)
+                    gmm_weight(i * frame.rows + y, x) *= alpha1;
+            }
 
-                gmm_mean(mode * frame.rows + y, x) = pix;
-                gmm_variance(mode * frame.rows + y, x) = c_varInit;
+            // init
 
-                //sort
-                //find the new place for it
+            gmm_mean(mode * frame.rows + y, x) = pix;
+            gmm_variance(mode * frame.rows + y, x) = constants->varInit_;
 
-                for (int i = nmodes - 1; i > 0; --i)
-                {
-                    // check one up
-                    if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
-                        break;
+            //sort
+            //find the new place for it
 
-                    //swap one up
-                    swap(gmm_weight, x, y, i - 1, frame.rows);
-                    swap(gmm_variance, x, y, i - 1, frame.rows);
-                    swap(gmm_mean, x, y, i - 1, frame.rows);
-                }
+            for (int i = nmodes - 1; i > 0; --i)
+            {
+                // check one up
+                if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
+                    break;
+
+                //swap one up
+                swap(gmm_weight, x, y, i - 1, frame.rows);
+                swap(gmm_variance, x, y, i - 1, frame.rows);
+                swap(gmm_mean, x, y, i - 1, frame.rows);
             }
+        }
 
-            //set the number of modes
-            modesUsed(y, x) = nmodes;
+        //set the number of modes
+        modesUsed(y, x) = nmodes;
 
-            bool isShadow = false;
-            if (detectShadows && !background)
-            {
-                float tWeight = 0.0f;
+        bool isShadow = false;
+        if (detectShadows && !background)
+        {
+            float tWeight = 0.0f;
 
-                // check all the components  marked as background:
-                for (int mode = 0; mode < nmodes; ++mode)
-                {
-                    WorkT mean = gmm_mean(mode * frame.rows + y, x);
+            // check all the components  marked as background:
+            for (int mode = 0; mode < nmodes; ++mode)
+            {
+                const WorkT mean = gmm_mean(mode * frame.rows + y, x);
 
-                    WorkT pix_mean = pix * mean;
+                const WorkT pix_mean = pix * mean;
 
-                    float numerator = sum(pix_mean);
-                    float denominator = sqr(mean);
+                const float numerator = sum(pix_mean);
+                const float denominator = sqr(mean);
 
-                    // no division by zero allowed
-                    if (denominator == 0)
-                        break;
-
-                    // if tau < a < 1 then also check the color distortion
-                    if (numerator <= denominator && numerator >= c_tau * denominator)
-                    {
-                        float a = numerator / denominator;
+                // no division by zero allowed
+                if (denominator == 0)
+                    break;
 
-                        WorkT dD = a * mean - pix;
+                // if tau < a < 1 then also check the color distortion
+                else if (numerator <= denominator && numerator >= constants->tau_ * denominator)
+                {
+                    const float a = numerator / denominator;
 
-                        if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
-                        {
-                            isShadow = true;
-                            break;
-                        }
-                    };
+                    WorkT dD = a * mean - pix;
 
-                    tWeight += gmm_weight(mode * frame.rows + y, x);
-                    if (tWeight > c_TB)
+                    if (sqr(dD) < constants->Tb_ * gmm_variance(mode * frame.rows + y, x) * a * a)
+                    {
+                        isShadow = true;
                         break;
-                }
-            }
+                    }
+                };
 
-            fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
+                tWeight += gmm_weight(mode * frame.rows + y, x);
+                if (tWeight > constants->TB_)
+                    break;
+            }
         }
 
-        template <typename SrcT, typename WorkT>
-        void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
-                         float alphaT, float prune, bool detectShadows, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
-
-            const float alpha1 = 1.0f - alphaT;
+        fgmask(y, x) = background ? 0 : isShadow ? constants->shadowVal_ : 255;
+    }
+}
 
-            if (detectShadows)
-            {
-                cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
+template <typename SrcT, typename WorkT>
+void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+                 float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream)
+{
+    dim3 block(32, 8);
+    dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
 
-                mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
-                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
-                                                                    alphaT, alpha1, prune);
-            }
-            else
-            {
-                cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
+    const float alpha1 = 1.0f - alphaT;
 
-                mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
-                                                                    weight, variance, (PtrStepSz<WorkT>) mean,
-                                                                    alphaT, alpha1, prune);
-            }
+    if (detectShadows)
+    {
+        cudaSafeCall(cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1));
 
-            cudaSafeCall( cudaGetLastError() );
+        mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>)frame, fgmask, modesUsed,
+                                                            weight, variance, (PtrStepSz<WorkT>)mean,
+                                                            alphaT, alpha1, prune, constants);
+    }
+    else
+    {
+        cudaSafeCall(cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1));
 
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
+        mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>)frame, fgmask, modesUsed,
+                                                             weight, variance, (PtrStepSz<WorkT>)mean,
+                                                             alphaT, alpha1, prune, constants);
+    }
 
-        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
-                      float alphaT, float prune, bool detectShadows, cudaStream_t stream)
-        {
-            typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
+    cudaSafeCall(cudaGetLastError());
 
-            static const func_t funcs[] =
-            {
-                0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
-            };
+    if (stream == 0)
+        cudaSafeCall(cudaDeviceSynchronize());
+}
 
-            funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
-        }
+void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
+              float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream)
+{
+    typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream);
 
-        template <typename WorkT, typename OutT>
-        __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
+    static const func_t funcs[] =
         {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+            0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>};
 
-            if (x >= modesUsed.cols || y >= modesUsed.rows)
-                return;
+    funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, constants, stream);
+}
 
-            int nmodes = modesUsed(y, x);
+template <typename WorkT, typename OutT>
+__global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst, const Constants *const constants)
+{
+    const int x = blockIdx.x * blockDim.x + threadIdx.x;
+    const int y = blockIdx.y * blockDim.y + threadIdx.y;
 
-            WorkT meanVal = VecTraits<WorkT>::all(0.0f);
-            float totalWeight = 0.0f;
+    if (x >= modesUsed.cols || y >= modesUsed.rows)
+        return;
 
-            for (int mode = 0; mode < nmodes; ++mode)
-            {
-                float weight = gmm_weight(mode * modesUsed.rows + y, x);
+    int nmodes = modesUsed(y, x);
 
-                WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
-                meanVal = meanVal + weight * mean;
+    WorkT meanVal = VecTraits<WorkT>::all(0.0f);
+    float totalWeight = 0.0f;
 
-                totalWeight += weight;
+    for (int mode = 0; mode < nmodes; ++mode)
+    {
+        float weight = gmm_weight(mode * modesUsed.rows + y, x);
 
-                if(totalWeight > c_TB)
-                    break;
-            }
+        WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
+        meanVal = meanVal + weight * mean;
 
-            meanVal = meanVal * (1.f / totalWeight);
+        totalWeight += weight;
 
-            dst(y, x) = saturate_cast<OutT>(meanVal);
-        }
+        if (totalWeight > constants->TB_)
+            break;
+    }
 
-        template <typename WorkT, typename OutT>
-        void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
-        {
-            dim3 block(32, 8);
-            dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
+    meanVal = meanVal * (1.f / totalWeight);
 
-            cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
+    dst(y, x) = saturate_cast<OutT>(meanVal);
+}
 
-            getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
-            cudaSafeCall( cudaGetLastError() );
+template <typename WorkT, typename OutT>
+void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream)
+{
+    dim3 block(32, 8);
+    dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
 
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
-        }
+    cudaSafeCall(cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1));
 
-        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
-        {
-            typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
+    getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>)mean, (PtrStepSz<OutT>)dst, constants);
+    cudaSafeCall(cudaGetLastError());
 
-            static const func_t funcs[] =
-            {
-                0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
-            };
+    if (stream == 0)
+        cudaSafeCall(cudaDeviceSynchronize());
+}
 
-            funcs[cn](modesUsed, weight, mean, dst, stream);
-        }
-    }
-}}}
+void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream)
+{
+    typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream);
 
+    static const func_t funcs[] =
+        {
+            0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>};
+
+    funcs[cn](modesUsed, weight, mean, dst, constants, stream);
+}
+} // namespace mog2
+} // namespace device
+} // namespace cuda
+} // namespace cv
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/cudabgsegm/src/cuda/mog2.hpp b/modules/cudabgsegm/src/cuda/mog2.hpp
new file mode 100644
index 0000000000..5b2155195f
--- /dev/null
+++ b/modules/cudabgsegm/src/cuda/mog2.hpp
@@ -0,0 +1,37 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CUDA_MOG2_H
+#define OPENCV_CUDA_MOG2_H
+
+#include "opencv2/core/cuda.hpp"
+
+struct CUstream_st;
+typedef struct CUstream_st *cudaStream_t;
+
+namespace cv { namespace cuda {
+
+class Stream;
+
+namespace device { namespace mog2 {
+
+typedef struct
+{
+    float Tb_;
+    float TB_;
+    float Tg_;
+    float varInit_;
+    float varMin_;
+    float varMax_;
+    float tau_;
+    int nmixtures_;
+    unsigned char shadowVal_;
+} Constants;
+
+void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream);
+void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream);
+
+} } } }
+
+#endif /* OPENCV_CUDA_MOG2_H */
diff --git a/modules/cudabgsegm/src/mog2.cpp b/modules/cudabgsegm/src/mog2.cpp
index e727dcfdaf..47135a088b 100644
--- a/modules/cudabgsegm/src/mog2.cpp
+++ b/modules/cudabgsegm/src/mog2.cpp
@@ -41,209 +41,207 @@
 //M*/
 
 #include "precomp.hpp"
+#include "cuda/mog2.hpp"
 
 using namespace cv;
 using namespace cv::cuda;
+using namespace cv::cuda::device::mog2;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr<cuda::BackgroundSubtractorMOG2>(); }
+Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int, double, bool)
+{
+    throw_no_cuda();
+    return Ptr<cuda::BackgroundSubtractorMOG2>();
+}
 
 #else
 
-namespace cv { namespace cuda { namespace device
+namespace
 {
-    namespace mog2
-    {
-        void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
-        void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
-        void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
-    }
-}}}
+// default parameters of gaussian background detection algorithm
+const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
+const float defaultVarThreshold = 4.0f * 4.0f;
+const int defaultNMixtures = 5;            // maximal number of Gaussians in mixture
+const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
+const float defaultVarThresholdGen = 3.0f * 3.0f;
+const float defaultVarInit = 15.0f; // initial variance for new components
+const float defaultVarMax = 5.0f * defaultVarInit;
+const float defaultVarMin = 4.0f;
+
+// additional parameters
+const float defaultCT = 0.05f;                // complexity reduction prior constant 0 - no reduction of number of components
+const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
+const float defaultShadowThreshold = 0.5f;    // Tau - shadow threshold, see the paper for explanation
+
+class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2
+{
+public:
+    MOG2Impl(int history, double varThreshold, bool detectShadows);
+    ~MOG2Impl();
 
-namespace
+    void apply(InputArray image, OutputArray fgmask, double learningRate = -1) CV_OVERRIDE;
+    void apply(InputArray image, OutputArray fgmask, double learningRate, Stream &stream) CV_OVERRIDE;
+
+    void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
+    void getBackgroundImage(OutputArray backgroundImage, Stream &stream) const CV_OVERRIDE;
+
+    int getHistory() const CV_OVERRIDE { return history_; }
+    void setHistory(int history) CV_OVERRIDE { history_ = history; }
+
+    int getNMixtures() const CV_OVERRIDE { return constantsHost_.nmixtures_; }
+    void setNMixtures(int nmixtures) CV_OVERRIDE { constantsHost_.nmixtures_ = nmixtures; }
+
+    double getBackgroundRatio() const CV_OVERRIDE { return constantsHost_.TB_; }
+    void setBackgroundRatio(double ratio) CV_OVERRIDE { constantsHost_.TB_ = (float)ratio; }
+
+    double getVarThreshold() const CV_OVERRIDE { return constantsHost_.Tb_; }
+    void setVarThreshold(double varThreshold) CV_OVERRIDE { constantsHost_.Tb_ = (float)varThreshold; }
+
+    double getVarThresholdGen() const CV_OVERRIDE { return constantsHost_.Tg_; }
+    void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { constantsHost_.Tg_ = (float)varThresholdGen; }
+
+    double getVarInit() const CV_OVERRIDE { return constantsHost_.varInit_; }
+    void setVarInit(double varInit) CV_OVERRIDE { constantsHost_.varInit_ = (float)varInit; }
+
+    double getVarMin() const CV_OVERRIDE { return constantsHost_.varMin_; }
+    void setVarMin(double varMin) CV_OVERRIDE { constantsHost_.varMin_ = ::fminf((float)varMin, constantsHost_.varMax_); }
+
+    double getVarMax() const CV_OVERRIDE { return constantsHost_.varMax_; }
+    void setVarMax(double varMax) CV_OVERRIDE { constantsHost_.varMax_ = ::fmaxf(constantsHost_.varMin_, (float)varMax); }
+
+    double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; }
+    void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float)ct; }
+
+    bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; }
+    void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; }
+
+    int getShadowValue() const CV_OVERRIDE { return constantsHost_.shadowVal_; }
+    void setShadowValue(int value) CV_OVERRIDE { constantsHost_.shadowVal_ = (uchar)value; }
+
+    double getShadowThreshold() const CV_OVERRIDE { return constantsHost_.tau_; }
+    void setShadowThreshold(double threshold) CV_OVERRIDE { constantsHost_.tau_ = (float)threshold; }
+
+private:
+    void initialize(Size frameSize, int frameType, Stream &stream);
+
+    Constants constantsHost_;
+    Constants *constantsDevice_;
+
+    int history_;
+    float ct_;
+    bool detectShadows_;
+
+    Size frameSize_;
+    int frameType_;
+    int nframes_;
+
+    GpuMat weight_;
+    GpuMat variance_;
+    GpuMat mean_;
+
+    //keep track of number of modes per pixel
+    GpuMat bgmodelUsedModes_;
+};
+
+MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : frameSize_(0, 0), frameType_(0), nframes_(0)
+{
+    history_ = history > 0 ? history : defaultHistory;
+    detectShadows_ = detectShadows;
+    ct_ = defaultCT;
+
+    setNMixtures(defaultNMixtures);
+    setBackgroundRatio(defaultBackgroundRatio);
+    setVarInit(defaultVarInit);
+    setVarMin(defaultVarMin);
+    setVarMax(defaultVarMax);
+    setVarThreshold(varThreshold > 0 ? (float)varThreshold : defaultVarThreshold);
+    setVarThresholdGen(defaultVarThresholdGen);
+
+    setShadowValue(defaultShadowValue);
+    setShadowThreshold(defaultShadowThreshold);
+
+    cudaSafeCall(cudaMalloc((void **)&constantsDevice_, sizeof(Constants)));
+}
+
+MOG2Impl::~MOG2Impl()
+{
+    cudaFree(constantsDevice_);
+}
+
+void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate)
+{
+    apply(image, fgmask, learningRate, Stream::Null());
+}
+
+void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream &stream)
 {
-    // default parameters of gaussian background detection algorithm
-    const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
-    const float defaultVarThreshold = 4.0f * 4.0f;
-    const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
-    const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
-    const float defaultVarThresholdGen = 3.0f * 3.0f;
-    const float defaultVarInit = 15.0f; // initial variance for new components
-    const float defaultVarMax = 5.0f * defaultVarInit;
-    const float defaultVarMin = 4.0f;
-
-    // additional parameters
-    const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
-    const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
-    const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation
-
-    class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2
-    {
-    public:
-        MOG2Impl(int history, double varThreshold, bool detectShadows);
-
-        void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE;
-        void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) CV_OVERRIDE;
-
-        void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE;
-        void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const CV_OVERRIDE;
-
-        int getHistory() const CV_OVERRIDE { return history_; }
-        void setHistory(int history) CV_OVERRIDE { history_ = history; }
-
-        int getNMixtures() const CV_OVERRIDE { return nmixtures_; }
-        void setNMixtures(int nmixtures) CV_OVERRIDE { nmixtures_ = nmixtures; }
-
-        double getBackgroundRatio() const CV_OVERRIDE { return backgroundRatio_; }
-        void setBackgroundRatio(double ratio) CV_OVERRIDE { backgroundRatio_ = (float) ratio; }
-
-        double getVarThreshold() const CV_OVERRIDE { return varThreshold_; }
-        void setVarThreshold(double varThreshold) CV_OVERRIDE { varThreshold_ = (float) varThreshold; }
-
-        double getVarThresholdGen() const CV_OVERRIDE { return varThresholdGen_; }
-        void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { varThresholdGen_ = (float) varThresholdGen; }
-
-        double getVarInit() const CV_OVERRIDE { return varInit_; }
-        void setVarInit(double varInit) CV_OVERRIDE { varInit_ = (float) varInit; }
-
-        double getVarMin() const CV_OVERRIDE { return varMin_; }
-        void setVarMin(double varMin) CV_OVERRIDE { varMin_ = (float) varMin; }
-
-        double getVarMax() const CV_OVERRIDE { return varMax_; }
-        void setVarMax(double varMax) CV_OVERRIDE { varMax_ = (float) varMax; }
-
-        double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; }
-        void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float) ct; }
-
-        bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; }
-        void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; }
-
-        int getShadowValue() const CV_OVERRIDE { return shadowValue_; }
-        void setShadowValue(int value) CV_OVERRIDE { shadowValue_ = (uchar) value; }
+    using namespace cv::cuda::device::mog2;
+
+    GpuMat frame = _frame.getGpuMat();
 
-        double getShadowThreshold() const CV_OVERRIDE { return shadowThreshold_; }
-        void setShadowThreshold(double threshold) CV_OVERRIDE { shadowThreshold_ = (float) threshold; }
-
-    private:
-        void initialize(Size frameSize, int frameType);
-
-        int history_;
-        int nmixtures_;
-        float backgroundRatio_;
-        float varThreshold_;
-        float varThresholdGen_;
-        float varInit_;
-        float varMin_;
-        float varMax_;
-        float ct_;
-        bool detectShadows_;
-        uchar shadowValue_;
-        float shadowThreshold_;
-
-        Size frameSize_;
-        int frameType_;
-        int nframes_;
-
-        GpuMat weight_;
-        GpuMat variance_;
-        GpuMat mean_;
-
-        //keep track of number of modes per pixel
-        GpuMat bgmodelUsedModes_;
-    };
-
-    MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) :
-        frameSize_(0, 0), frameType_(0), nframes_(0)
-    {
-        history_ = history > 0 ? history : defaultHistory;
-        varThreshold_ = varThreshold > 0 ? (float) varThreshold : defaultVarThreshold;
-        detectShadows_ = detectShadows;
-
-        nmixtures_ = defaultNMixtures;
-        backgroundRatio_ = defaultBackgroundRatio;
-        varInit_ = defaultVarInit;
-        varMax_ = defaultVarMax;
-        varMin_ = defaultVarMin;
-        varThresholdGen_ = defaultVarThresholdGen;
-        ct_ = defaultCT;
-        shadowValue_ =  defaultShadowValue;
-        shadowThreshold_ = defaultShadowThreshold;
-    }
+    int ch = frame.channels();
+    int work_ch = ch;
 
-    void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate)
-    {
-        apply(image, fgmask, learningRate, Stream::Null());
-    }
+    if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
+        initialize(frame.size(), frame.type(), stream);
 
-    void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
-    {
-        using namespace cv::cuda::device::mog2;
+    _fgmask.create(frameSize_, CV_8UC1);
+    GpuMat fgmask = _fgmask.getGpuMat();
 
-        GpuMat frame = _frame.getGpuMat();
+    fgmask.setTo(Scalar::all(0), stream);
 
-        int ch = frame.channels();
-        int work_ch = ch;
+    ++nframes_;
+    learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_);
+    CV_Assert(learningRate >= 0);
 
-        if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels())
-            initialize(frame.size(), frame.type());
+    mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_,
+             (float)learningRate, static_cast<float>(-learningRate * ct_), detectShadows_, constantsDevice_, StreamAccessor::getStream(stream));
+}
+
+void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
+{
+    getBackgroundImage(backgroundImage, Stream::Null());
+}
+
+void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream &stream) const
+{
+    using namespace cv::cuda::device::mog2;
+
+    _backgroundImage.create(frameSize_, frameType_);
+    GpuMat backgroundImage = _backgroundImage.getGpuMat();
+
+    getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, constantsDevice_, StreamAccessor::getStream(stream));
+}
+
+void MOG2Impl::initialize(cv::Size frameSize, int frameType, Stream &stream)
+{
+    using namespace cv::cuda::device::mog2;
 
-        _fgmask.create(frameSize_, CV_8UC1);
-        GpuMat fgmask = _fgmask.getGpuMat();
+    CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
 
-        fgmask.setTo(Scalar::all(0), stream);
+    frameSize_ = frameSize;
+    frameType_ = frameType;
+    nframes_ = 0;
 
-        ++nframes_;
-        learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_);
-        CV_Assert( learningRate >= 0 );
+    const int ch = CV_MAT_CN(frameType);
+    const int work_ch = ch;
 
-        mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_,
-                 (float) learningRate, static_cast<float>(-learningRate * ct_), detectShadows_, StreamAccessor::getStream(stream));
-    }
+    // for each gaussian mixture of each pixel bg model we store ...
+    // the mixture weight (w),
+    // the mean (nchannels values) and
+    // the covariance
+    weight_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1);
+    variance_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1);
+    mean_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC(work_ch));
 
-    void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
-    {
-        getBackgroundImage(backgroundImage, Stream::Null());
-    }
-
-    void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
-    {
-        using namespace cv::cuda::device::mog2;
-
-        _backgroundImage.create(frameSize_, frameType_);
-        GpuMat backgroundImage = _backgroundImage.getGpuMat();
-
-        getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream));
-    }
-
-    void MOG2Impl::initialize(cv::Size frameSize, int frameType)
-    {
-        using namespace cv::cuda::device::mog2;
-
-        CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 );
-
-        frameSize_ = frameSize;
-        frameType_ = frameType;
-        nframes_ = 0;
-
-        int ch = CV_MAT_CN(frameType);
-        int work_ch = ch;
-
-        // for each gaussian mixture of each pixel bg model we store ...
-        // the mixture weight (w),
-        // the mean (nchannels values) and
-        // the covariance
-        weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-        variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
-        mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
-
-        //make the array for keeping track of the used modes per pixel - all zeros at start
-        bgmodelUsedModes_.create(frameSize_, CV_8UC1);
-        bgmodelUsedModes_.setTo(Scalar::all(0));
+    //make the array for keeping track of the used modes per pixel - all zeros at start
+    bgmodelUsedModes_.create(frameSize_, CV_8UC1);
+    bgmodelUsedModes_.setTo(Scalar::all(0));
 
-        loadConstants(nmixtures_, varThreshold_, backgroundRatio_, varThresholdGen_, varInit_, varMin_, varMax_, shadowThreshold_, shadowValue_);
-    }
+    cudaSafeCall(cudaMemcpyAsync(constantsDevice_, &constantsHost_, sizeof(Constants), cudaMemcpyHostToDevice, StreamAccessor::getStream(stream)));
 }
+} // namespace
 
 Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows)
 {

From 4733a19babec760ba237b8c277bb1de664a641c1 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Thu, 19 Dec 2019 13:20:42 +0300
Subject: [PATCH 11/11] Merge pull request #16194 from alalek:fix_16192

* imgproc(test): resize(LANCZOS4) reproducer 16192

* imgproc: fix resize LANCZOS4 coefficients generation
---
 modules/imgproc/src/resize.cpp        | 23 +++++++++++++----------
 modules/imgproc/test/test_imgwarp.cpp | 13 +++++++++++++
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp
index cc967cf469..02f78819de 100644
--- a/modules/imgproc/src/resize.cpp
+++ b/modules/imgproc/src/resize.cpp
@@ -920,20 +920,23 @@ static inline void interpolateLanczos4( float x, float* coeffs )
     static const double cs[][2]=
     {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
 
-    if( x < FLT_EPSILON )
-    {
-        for( int i = 0; i < 8; i++ )
-            coeffs[i] = 0;
-        coeffs[3] = 1;
-        return;
-    }
-
     float sum = 0;
     double y0=-(x+3)*CV_PI*0.25, s0 = std::sin(y0), c0= std::cos(y0);
     for(int i = 0; i < 8; i++ )
     {
-        double y = -(x+3-i)*CV_PI*0.25;
-        coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
+        float y0_ = (x+3-i);
+        if (fabs(y0_) >= 1e-6f)
+        {
+            double y = -y0_*CV_PI*0.25;
+            coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
+        }
+        else
+        {
+            // special handling for 'x' values:
+            // - ~0.0: 0 0 0 1 0 0 0 0
+            // - ~1.0: 0 0 0 0 1 0 0 0
+            coeffs[i] = 1e30f;
+        }
         sum += coeffs[i];
     }
 
diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp
index 400426af15..232f374548 100644
--- a/modules/imgproc/test/test_imgwarp.cpp
+++ b/modules/imgproc/test/test_imgwarp.cpp
@@ -1708,6 +1708,19 @@ TEST(Resize, Area_half)
     }
 }
 
+TEST(Resize, lanczos4_regression_16192)
+{
+    Size src_size(11, 17);
+    Size dst_size(11, 153);
+    Mat src(src_size, CV_8UC3, Scalar::all(128));
+    Mat dst(dst_size, CV_8UC3, Scalar::all(255));
+
+    cv::resize(src, dst, dst_size, 0, 0, INTER_LANCZOS4);
+
+    Mat expected(dst_size, CV_8UC3, Scalar::all(128));
+    EXPECT_EQ(cvtest::norm(dst, expected, NORM_INF), 0) << dst(Rect(0,0,8,8));
+}
+
 TEST(Imgproc_Warp, multichannel)
 {
     static const int inter_types[] = {INTER_NEAREST, INTER_AREA, INTER_CUBIC,