From 13bc55a015b93cc4fbebf0083cb01fb5c3513225 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 9 Dec 2019 18:24:36 +0300 Subject: [PATCH 01/11] calib3d: clarify error messages in collectCalibrationData() --- modules/calib3d/src/calibration.cpp | 42 ++++++++++++++++------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp index 7a426e530d..9c640540f7 100644 --- a/modules/calib3d/src/calibration.cpp +++ b/modules/calib3d/src/calibration.cpp @@ -3115,28 +3115,30 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints, Mat& npoints ) { int nimages = (int)objectPoints.total(); - int i, j = 0, ni = 0, total = 0; - CV_Assert(nimages > 0 && nimages == (int)imagePoints1.total() && - (!imgPtMat2 || nimages == (int)imagePoints2.total())); + int total = 0; + CV_Assert(nimages > 0); + CV_CheckEQ(nimages, (int)imagePoints1.total(), ""); + if (imgPtMat2) + CV_CheckEQ(nimages, (int)imagePoints2.total(), ""); - for( i = 0; i < nimages; i++ ) + for (int i = 0; i < nimages; i++) { Mat objectPoint = objectPoints.getMat(i); if (objectPoint.empty()) CV_Error(CV_StsBadSize, "objectPoints should not contain empty vector of vectors of points"); - ni = objectPoint.checkVector(3, CV_32F); - if( ni <= 0 ) + int numberOfObjectPoints = objectPoint.checkVector(3, CV_32F); + if (numberOfObjectPoints <= 0) CV_Error(CV_StsUnsupportedFormat, "objectPoints should contain vector of vectors of points of type Point3f"); Mat imagePoint1 = imagePoints1.getMat(i); if (imagePoint1.empty()) CV_Error(CV_StsBadSize, "imagePoints1 should not contain empty vector of vectors of points"); - int ni1 = imagePoint1.checkVector(2, CV_32F); - if( ni1 <= 0 ) + int numberOfImagePoints = imagePoint1.checkVector(2, CV_32F); + if (numberOfImagePoints <= 0) CV_Error(CV_StsUnsupportedFormat, "imagePoints1 should contain vector of vectors of points of type Point2f"); - CV_Assert( ni == ni1 ); + CV_CheckEQ(numberOfObjectPoints, numberOfImagePoints, "Number of object and image points must be equal"); - total += ni; + total += numberOfObjectPoints; } npoints.create(1, (int)nimages, CV_32S); @@ -3144,7 +3146,7 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints, imgPtMat1.create(1, (int)total, CV_32FC2); Point2f* imgPtData2 = 0; - if( imgPtMat2 ) + if (imgPtMat2) { imgPtMat2->create(1, (int)total, CV_32FC2); imgPtData2 = imgPtMat2->ptr(); @@ -3153,28 +3155,30 @@ static void collectCalibrationData( InputArrayOfArrays objectPoints, Point3f* objPtData = objPtMat.ptr(); Point2f* imgPtData1 = imgPtMat1.ptr(); - for( i = 0; i < nimages; i++, j += ni ) + for (int i = 0, j = 0; i < nimages; i++) { Mat objpt = objectPoints.getMat(i); Mat imgpt1 = imagePoints1.getMat(i); - ni = objpt.checkVector(3, CV_32F); - npoints.at(i) = ni; - for (int n = 0; n < ni; ++n) + int numberOfObjectPoints = objpt.checkVector(3, CV_32F); + npoints.at(i) = numberOfObjectPoints; + for (int n = 0; n < numberOfObjectPoints; ++n) { objPtData[j + n] = objpt.ptr()[n]; imgPtData1[j + n] = imgpt1.ptr()[n]; } - if( imgPtData2 ) + if (imgPtData2) { Mat imgpt2 = imagePoints2.getMat(i); - int ni2 = imgpt2.checkVector(2, CV_32F); - CV_Assert( ni == ni2 ); - for (int n = 0; n < ni2; ++n) + int numberOfImage2Points = imgpt2.checkVector(2, CV_32F); + CV_CheckEQ(numberOfObjectPoints, numberOfImage2Points, "Number of object and image(2) points must be equal"); + for (int n = 0; n < numberOfImage2Points; ++n) { imgPtData2[j + n] = imgpt2.ptr()[n]; } } + + j += numberOfObjectPoints; } } From af04b422c9fc2d10b30a2323175805f5e63863e8 Mon Sep 17 00:00:00 2001 From: Rajkiran Natarajan Date: Tue, 17 Dec 2019 20:17:32 -0800 Subject: [PATCH 02/11] Change program type in hdr format files to modern value: RADIANCE so modern readers that expect RADIANCE will read it --- modules/imgcodecs/src/rgbe.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgcodecs/src/rgbe.cpp b/modules/imgcodecs/src/rgbe.cpp index 7942fcd903..cea23f6053 100644 --- a/modules/imgcodecs/src/rgbe.cpp +++ b/modules/imgcodecs/src/rgbe.cpp @@ -145,7 +145,7 @@ rgbe2float(float *red, float *green, float *blue, unsigned char rgbe[4]) /* default minimal header. modify if you want more information in header */ int RGBE_WriteHeader(FILE *fp, int width, int height, rgbe_header_info *info) { - const char *programtype = "RGBE"; + const char *programtype = "RADIANCE"; if (info && (info->valid & RGBE_VALID_PROGRAMTYPE)) programtype = info->programtype; From 5d15c65e48eb13509dd8425dc7c4ed3181f818b8 Mon Sep 17 00:00:00 2001 From: mcellis33 Date: Wed, 18 Dec 2019 14:25:59 +0000 Subject: [PATCH 03/11] Merge pull request #16136 from mcellis33:mec-nan * Handle det == 0 in findCircle3pts. Issue 16051 shows a case where findCircle3pts returns NaN for the center coordinates and radius due to dividing by a determinant of 0. In this case, the points are colinear, so the longest distance between any 2 points is the diameter of the minimum enclosing circle. * imgproc(test): update test checks for minEnclosingCircle() * imgproc: fix handling of special cases in minEnclosingCircle() --- modules/imgproc/src/shapedescr.cpp | 47 ++++++++++++++- modules/imgproc/test/test_convhull.cpp | 81 ++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 3 deletions(-) diff --git a/modules/imgproc/src/shapedescr.cpp b/modules/imgproc/src/shapedescr.cpp index 436c74eade..8ba4b41424 100644 --- a/modules/imgproc/src/shapedescr.cpp +++ b/modules/imgproc/src/shapedescr.cpp @@ -60,6 +60,29 @@ static void findCircle3pts(Point2f *pts, Point2f ¢er, float &radius) Point2f midPoint2 = (pts[0] + pts[2]) / 2.0f; float c2 = midPoint2.x * v2.x + midPoint2.y * v2.y; float det = v1.x * v2.y - v1.y * v2.x; + if (fabs(det) <= EPS) + { + // v1 and v2 are colinear, so the longest distance between any 2 points + // is the diameter of the minimum enclosing circle. + float d1 = normL2Sqr(pts[0] - pts[1]); + float d2 = normL2Sqr(pts[0] - pts[2]); + float d3 = normL2Sqr(pts[1] - pts[2]); + radius = sqrt(std::max(d1, std::max(d2, d3))) * 0.5f + EPS; + if (d1 >= d2 && d1 >= d3) + { + center = (pts[0] + pts[1]) * 0.5f; + } + else if (d2 >= d1 && d2 >= d3) + { + center = (pts[0] + pts[2]) * 0.5f; + } + else + { + CV_DbgAssert(d3 >= d1 && d3 >= d2); + center = (pts[1] + pts[2]) * 0.5f; + } + return; + } float cx = (c1 * v2.y - c2 * v1.y) / det; float cy = (v1.x * c2 - v2.x * c1) / det; center.x = (float)cx; @@ -92,7 +115,13 @@ static void findThirdPoint(const PT *pts, int i, int j, Point2f ¢er, float & ptsf[0] = (Point2f)pts[i]; ptsf[1] = (Point2f)pts[j]; ptsf[2] = (Point2f)pts[k]; - findCircle3pts(ptsf, center, radius); + Point2f new_center; float new_radius = 0; + findCircle3pts(ptsf, new_center, new_radius); + if (new_radius > 0) + { + radius = new_radius; + center = new_center; + } } } } @@ -117,7 +146,13 @@ void findSecondPoint(const PT *pts, int i, Point2f ¢er, float &radius) } else { - findThirdPoint(pts, i, j, center, radius); + Point2f new_center; float new_radius = 0; + findThirdPoint(pts, i, j, new_center, new_radius); + if (new_radius > 0) + { + radius = new_radius; + center = new_center; + } } } } @@ -143,7 +178,13 @@ static void findMinEnclosingCircle(const PT *pts, int count, Point2f ¢er, fl } else { - findSecondPoint(pts, i, center, radius); + Point2f new_center; float new_radius = 0; + findSecondPoint(pts, i, new_center, new_radius); + if (new_radius > 0) + { + radius = new_radius; + center = new_center; + } } } } diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp index 3f12140328..fc29b7fbb5 100644 --- a/modules/imgproc/test/test_convhull.cpp +++ b/modules/imgproc/test/test_convhull.cpp @@ -1084,6 +1084,87 @@ int CV_MinCircleTest2::validate_test_results( int test_case_idx ) return code; } +/****************************************************************************************\ +* minEnclosingCircle Test 3 * +\****************************************************************************************/ + +TEST(Imgproc_minEnclosingCircle, basic_test) +{ + vector pts; + pts.push_back(Point2f(0, 0)); + pts.push_back(Point2f(10, 0)); + pts.push_back(Point2f(5, 1)); + const float EPS = 1.0e-3f; + Point2f center; + float radius; + + // pts[2] is within the circle with diameter pts[0] - pts[1]. + // 2 + // 0 1 + // NB: The triangle is obtuse, so the only pts[0] and pts[1] are on the circle. + minEnclosingCircle(pts, center, radius); + EXPECT_NEAR(center.x, 5, EPS); + EXPECT_NEAR(center.y, 0, EPS); + EXPECT_NEAR(5, radius, EPS); + + // pts[2] is on the circle with diameter pts[0] - pts[1]. + // 2 + // 0 1 + pts[2] = Point2f(5, 5); + minEnclosingCircle(pts, center, radius); + EXPECT_NEAR(center.x, 5, EPS); + EXPECT_NEAR(center.y, 0, EPS); + EXPECT_NEAR(5, radius, EPS); + + // pts[2] is outside the circle with diameter pts[0] - pts[1]. + // 2 + // + // + // 0 1 + // NB: The triangle is acute, so all 3 points are on the circle. + pts[2] = Point2f(5, 10); + minEnclosingCircle(pts, center, radius); + EXPECT_NEAR(center.x, 5, EPS); + EXPECT_NEAR(center.y, 3.75, EPS); + EXPECT_NEAR(6.25f, radius, EPS); + + // The 3 points are colinear. + pts[2] = Point2f(3, 0); + minEnclosingCircle(pts, center, radius); + EXPECT_NEAR(center.x, 5, EPS); + EXPECT_NEAR(center.y, 0, EPS); + EXPECT_NEAR(5, radius, EPS); + + // 2 points are the same. + pts[2] = pts[1]; + minEnclosingCircle(pts, center, radius); + EXPECT_NEAR(center.x, 5, EPS); + EXPECT_NEAR(center.y, 0, EPS); + EXPECT_NEAR(5, radius, EPS); + + // 3 points are the same. + pts[0] = pts[1]; + minEnclosingCircle(pts, center, radius); + EXPECT_NEAR(center.x, 10, EPS); + EXPECT_NEAR(center.y, 0, EPS); + EXPECT_NEAR(0, radius, EPS); +} + +TEST(Imgproc_minEnclosingCircle, regression_16051) { + vector pts; + pts.push_back(Point2f(85, 1415)); + pts.push_back(Point2f(87, 1415)); + pts.push_back(Point2f(89, 1414)); + pts.push_back(Point2f(89, 1414)); + pts.push_back(Point2f(87, 1412)); + Point2f center; + float radius; + minEnclosingCircle(pts, center, radius); + EXPECT_NEAR(center.x, 86.9f, 1e-3); + EXPECT_NEAR(center.y, 1414.1f, 1e-3); + EXPECT_NEAR(2.1024551f, radius, 1e-3); +} + /****************************************************************************************\ * Perimeter Test * \****************************************************************************************/ From aa80f754f403bb1e8e3248e201bab090f64cc401 Mon Sep 17 00:00:00 2001 From: antalzsiroscandid Date: Wed, 27 Nov 2019 15:31:38 +0100 Subject: [PATCH 04/11] dnn: reading IR models from buffer --- modules/dnn/include/opencv2/dnn/dnn.hpp | 47 ++++++++- modules/dnn/src/dnn.cpp | 128 +++++++++++++++++++++--- modules/dnn/test/test_misc.cpp | 54 ++++++++++ 3 files changed, 211 insertions(+), 18 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index a4bbffcf41..94e2ada3f1 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -384,7 +384,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_WRAP Net(); //!< Default constructor. CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore. - /** @brief Create a network from Intel's Model Optimizer intermediate representation. + /** @brief Create a network from Intel's Model Optimizer intermediate representation (IR). * @param[in] xml XML configuration file with network's topology. * @param[in] bin Binary file with trained weights. * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine @@ -392,6 +392,25 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN */ CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin); + /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). + * @param[in] bufferModelConfig buffer with model's configuration. + * @param[in] bufferWeights buffer with model's trained weights. + * @returns Net object. + */ + CV_WRAP static + Net readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights); + + /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). + * @param[in] bufferModelConfigPtr buffer pointer of model's configuration. + * @param[in] bufferModelConfigSize buffer size of model's configuration. + * @param[in] bufferWeightsPtr buffer pointer of model's trained weights. + * @param[in] bufferWeightsSize buffer size of model's trained weights. + * @returns Net object. + */ + static + Net readFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize); + /** Returns true if there are no layers in the network. */ CV_WRAP bool empty() const; @@ -857,7 +876,31 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine * backend. */ - CV_EXPORTS_W Net readNetFromModelOptimizer(const String &xml, const String &bin); + CV_EXPORTS_W + Net readNetFromModelOptimizer(const String &xml, const String &bin); + + /** @brief Load a network from Intel's Model Optimizer intermediate representation. + * @param[in] bufferModelConfig Buffer contains XML configuration with network's topology. + * @param[in] bufferWeights Buffer contains binary data with trained weights. + * @returns Net object. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. + */ + CV_EXPORTS_W + Net readNetFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights); + + /** @brief Load a network from Intel's Model Optimizer intermediate representation. + * @param[in] bufferModelConfigPtr Pointer to buffer which contains XML configuration with network's topology. + * @param[in] bufferModelConfigSize Binary size of XML configuration data. + * @param[in] bufferWeightsPtr Pointer to buffer which contains binary data with trained weights. + * @param[in] bufferWeightsSize Binary size of trained weights data. + * @returns Net object. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. + */ + CV_EXPORTS + Net readNetFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize); /** @brief Reads a network model ONNX. * @param onnxFile path to the .onnx file with text description of the network architecture. diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index ad2e52766b..e2c296bc03 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2910,28 +2910,22 @@ struct Net::Impl return getBlobAsync(getPinByAlias(outputName)); } #endif // CV_CXX11 + +#ifdef HAVE_INF_ENGINE + static + Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet); +#endif }; Net::Net() : impl(new Net::Impl) { } -Net Net::readFromModelOptimizer(const String& xml, const String& bin) +#ifdef HAVE_INF_ENGINE +/*static*/ +Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet) { -#ifndef HAVE_INF_ENGINE - CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); -#else - -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) - InferenceEngine::CNNNetReader reader; - reader.ReadNetwork(xml); - reader.ReadWeights(bin); - - InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); -#else - InferenceEngine::Core& ie = getCore(); - InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); -#endif + CV_TRACE_FUNCTION(); std::vector inputsNames; for (auto& it : ieNet.getInputsInfo()) @@ -3001,9 +2995,95 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin) cvNet.impl->skipInfEngineInit = true; return cvNet; +} +#endif // HAVE_INF_ENGINE + +Net Net::readFromModelOptimizer(const String& xml, const String& bin) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(xml); CV_UNUSED(bin); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else +#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) + InferenceEngine::CNNNetReader reader; + reader.ReadNetwork(xml); + reader.ReadWeights(bin); + + InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); +#else + InferenceEngine::Core& ie = getCore(); + InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); +#endif + + return Impl::createNetworkFromModelOptimizer(ieNet); #endif // HAVE_INF_ENGINE } +Net Net::readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights) +{ + CV_TRACE_FUNCTION(); + CV_Assert(!bufferModelConfig.empty()); + CV_Assert(!bufferWeights.empty()); + return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(), + bufferWeights.data(), bufferWeights.size()); +} + +Net Net::readFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize +) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr); + CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else + +#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) + InferenceEngine::CNNNetReader reader; + + try + { + reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize); + + InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); + InferenceEngine::TBlob::Ptr weightsBlobPtr(new InferenceEngine::TBlob(tensorDesc)); + weightsBlobPtr->allocate(); + std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize); + reader.SetWeights(weightsBlobPtr); + } + catch (const std::exception& e) + { + CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); + } + + InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); +#else + InferenceEngine::Core& ie = getCore(); + + std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize); + + InferenceEngine::CNNNetwork ieNet; + try + { + InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); + InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize); + + ieNet = ie.ReadNetwork(model, weights_blob); + } + catch (const std::exception& e) + { + CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); + } +#endif + + return Impl::createNetworkFromModelOptimizer(ieNet); +#endif // HAVE_INF_ENGINE +} + + Net::~Net() { } @@ -4344,7 +4424,7 @@ Net readNet(const String& _framework, const std::vector& bufferModel, else if (framework == "torch") CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); else if (framework == "dldt") - CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers"); + return readNetFromModelOptimizer(bufferConfig, bufferModel); CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); } @@ -4353,5 +4433,21 @@ Net readNetFromModelOptimizer(const String &xml, const String &bin) return Net::readFromModelOptimizer(xml, bin); } +Net readNetFromModelOptimizer(const std::vector& bufferCfg, const std::vector& bufferModel) +{ + return Net::readFromModelOptimizer(bufferCfg, bufferModel); +} + +Net readNetFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize +) +{ + return Net::readFromModelOptimizer( + bufferModelConfigPtr, bufferModelConfigSize, + bufferWeightsPtr, bufferWeightsSize + ); +} + CV__DNN_EXPERIMENTAL_NS_END }} // namespace diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 464ef104b3..2069b97419 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -637,6 +637,60 @@ TEST_P(Test_Model_Optimizer, forward_two_nets) normAssert(ref0, ref2, 0, 0); } + +TEST_P(Test_Model_Optimizer, readFromBuffer) +{ + const Backend backendId = get<0>(GetParam()); + const Target targetId = get<1>(GetParam()); + + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + throw SkipTestException("No support for async forward"); + + const std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; + const std::string& weightsFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); + const std::string& modelFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml"); + + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); + else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); + else + FAIL() << "Unknown backendId"; + + Net net1 = readNetFromModelOptimizer(modelFile, weightsFile); + net1.setPreferableBackend(backendId); + net1.setPreferableTarget(targetId); + + + std::vector modelConfig; + readFileContent(modelFile, modelConfig); + std::vector weights; + readFileContent(weightsFile, weights); + + Net net2 = readNetFromModelOptimizer( + (const uchar*)modelConfig.data(), modelConfig.size(), + (const uchar*)weights.data(), weights.size() + ); + net2.setPreferableBackend(backendId); + net2.setPreferableTarget(targetId); + + int blobSize[] = {2, 6, 75, 113}; + Mat input(4, &blobSize[0], CV_32F); + randu(input, 0, 255); + + Mat ref, actual; + { + net1.setInput(input); + ref = net1.forward(); + } + { + net2.setInput(input); + actual = net2.forward(); + } + + normAssert(ref, actual, "", 0, 0); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Model_Optimizer, dnnBackendsAndTargetsIE() ); From f5a84f75c4427e0754138264dbce0b55a80d5d38 Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Wed, 18 Dec 2019 20:05:36 +0300 Subject: [PATCH 05/11] Fix for CV_8UC2 linear resize vectorization --- modules/imgproc/src/resize.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index df127d1c43..cc967cf469 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -1605,13 +1605,14 @@ struct HResizeLinearVecU8_X4 for( dx = 0; dx < len0; dx += step ) { + int ofs[4] = { xofs[dx], xofs[dx + 2], xofs[dx + 4], xofs[dx + 6] }; v_int16x8 al = v_load(alpha+dx*2); v_int16x8 ah = v_load(alpha+dx*2+8); v_uint16x8 sl, sh; - v_expand(v_interleave_pairs(v_lut_quads(S0, xofs+dx)), sl, sh); + v_expand(v_interleave_pairs(v_lut_quads(S0, ofs)), sl, sh); v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(sl), al)); v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah)); - v_expand(v_interleave_pairs(v_lut_pairs(S1, xofs+dx)), sl, sh); + v_expand(v_interleave_pairs(v_lut_quads(S1, ofs)), sl, sh); v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(sl), al)); v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah)); } @@ -1622,10 +1623,11 @@ struct HResizeLinearVecU8_X4 int *D = dst[k]; for( dx = 0; dx < len0; dx += step ) { + int ofs[4] = { xofs[dx], xofs[dx + 2], xofs[dx + 4], xofs[dx + 6] }; v_int16x8 al = v_load(alpha+dx*2); v_int16x8 ah = v_load(alpha+dx*2+8); v_uint16x8 sl, sh; - v_expand(v_interleave_pairs(v_lut_quads(S, xofs+dx)), sl, sh); + v_expand(v_interleave_pairs(v_lut_quads(S, ofs)), sl, sh); v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(sl), al)); v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah)); } From 28a5f7d66bed5b562cba38bf588dc573c26c24f5 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 18 Dec 2019 23:14:38 +0000 Subject: [PATCH 06/11] 3rdparty: TBB version 2019u8 => 2020.0 --- 3rdparty/tbb/CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index a3c0a812ce..67eae7dc0a 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -5,10 +5,11 @@ if (WIN32 AND NOT ARM) message(FATAL_ERROR "BUILD_TBB option supports Windows on ARM only!\nUse regular official TBB build instead of the BUILD_TBB option!") endif() -ocv_update(OPENCV_TBB_RELEASE "2019_U8") -ocv_update(OPENCV_TBB_RELEASE_MD5 "7c371d0f62726154d2c568a85697a0ad") +ocv_update(OPENCV_TBB_RELEASE "v2020.0") +ocv_update(OPENCV_TBB_RELEASE_MD5 "5858dd01ec007c139d5d178b21e06dae") ocv_update(OPENCV_TBB_FILENAME "${OPENCV_TBB_RELEASE}.tar.gz") -ocv_update(OPENCV_TBB_SUBDIR "tbb-${OPENCV_TBB_RELEASE}") +string(REGEX REPLACE "^v" "" OPENCV_TBB_RELEASE_ "${OPENCV_TBB_RELEASE}") +ocv_update(OPENCV_TBB_SUBDIR "tbb-${OPENCV_TBB_RELEASE_}") set(tbb_src_dir "${OpenCV_BINARY_DIR}/3rdparty/tbb") ocv_download(FILENAME ${OPENCV_TBB_FILENAME} @@ -34,10 +35,12 @@ ocv_include_directories("${tbb_src_dir}/include" file(GLOB lib_srcs "${tbb_src_dir}/src/tbb/*.cpp") file(GLOB lib_hdrs "${tbb_src_dir}/src/tbb/*.h") list(APPEND lib_srcs "${tbb_src_dir}/src/rml/client/rml_tbb.cpp") +ocv_list_filterout(lib_srcs "${tbb_src_dir}/src/tbb/tbbbind.cpp") # hwloc.h requirement if (WIN32) add_definitions(/D__TBB_DYNAMIC_LOAD_ENABLED=0 /D__TBB_BUILD=1 + /DTBB_SUPPRESS_DEPRECATED_MESSAGES=1 /DTBB_NO_LEGACY=1 /D_UNICODE /DUNICODE From 8d22ac200f488eb76d86cdf6ad12581df2980095 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 11 Dec 2019 20:08:10 +0000 Subject: [PATCH 07/11] core: workaround flipHoriz() alignment issues --- modules/core/include/opencv2/core/utility.hpp | 37 +++++++++ modules/core/src/copy.cpp | 77 ++++++++++++++++--- 2 files changed, 102 insertions(+), 12 deletions(-) diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index e7f169b01a..063747e730 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -514,6 +514,43 @@ static inline size_t roundUp(size_t a, unsigned int b) return a + b - 1 - (a + b - 1) % b; } +/** @brief Alignment check of passed values + +Usage: `isAligned(...)` + +@note Alignment(N) must be a power of 2 (2**k, 2^k) +*/ +template static inline +bool isAligned(const T& data) +{ + CV_StaticAssert((N & (N - 1)) == 0, ""); // power of 2 + return (((size_t)data) & (N - 1)) == 0; +} +/** @overload */ +template static inline +bool isAligned(const void* p1) +{ + return isAligned((size_t)p1); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2) +{ + return isAligned(((size_t)p1)|((size_t)p2)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4)); +} + /** @brief Enables or disables the optimized code. The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2, diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 3f68a2555a..3fa498286a 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -563,6 +563,12 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask) return *this; } +#if CV_NEON && !defined(__aarch64__) +#define CV_CHECK_ALIGNMENT 1 +#else +#define CV_CHECK_ALIGNMENT 0 +#endif + #if CV_SIMD128 template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { @@ -572,6 +578,10 @@ template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s int width_1 = width & -v_uint8x16::nlanes; int i, j; +#if CV_CHECK_ALIGNMENT + CV_Assert(isAligned(src, dst)); +#endif + for( ; size.height--; src += sstep, dst += dstep ) { for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) @@ -585,7 +595,7 @@ template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s v_store((T*)(dst + j - v_uint8x16::nlanes), t0); v_store((T*)(dst + i), t1); } - if (((size_t)src|(size_t)dst) % sizeof(T) == 0) + if (isAligned(src, dst)) { for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) { @@ -620,6 +630,11 @@ template CV_ALWAYS_INLINE void flipHoriz_double( const int end = (int)(size.width*esz); int width = (end + 1)/2; +#if CV_CHECK_ALIGNMENT + CV_Assert(isAligned(src, dst)); + CV_Assert(isAligned(src, dst)); +#endif + for( ; size.height--; src += sstep, dst += dstep ) { for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) @@ -644,6 +659,9 @@ static void flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { #if CV_SIMD +#if CV_CHECK_ALIGNMENT + size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; +#endif if (esz == 2 * v_uint8x16::nlanes) { int end = (int)(size.width*esz); @@ -693,15 +711,27 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, } } } - else if (esz == 8) + else if (esz == 8 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { flipHoriz_single(src, sstep, dst, dstep, size, esz); } - else if (esz == 4) + else if (esz == 4 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { flipHoriz_single(src, sstep, dst, dstep, size, esz); } - else if (esz == 2) + else if (esz == 2 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { flipHoriz_single(src, sstep, dst, dstep, size, esz); } @@ -709,7 +739,11 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, { flipHoriz_single(src, sstep, dst, dstep, size, esz); } - else if (esz == 24) + else if (esz == 24 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { int end = (int)(size.width*esz); int width = (end + 1)/2; @@ -732,6 +766,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, } } } +#if !CV_CHECK_ALIGNMENT else if (esz == 12) { flipHoriz_double(src, sstep, dst, dstep, size, esz); @@ -744,8 +779,9 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, { flipHoriz_double(src, sstep, dst, dstep, size, esz); } - else #endif + else +#endif // CV_SIMD { int i, j, limit = (int)(((size.width + 1)/2)*esz); AutoBuffer _tab(size.width*esz); @@ -779,16 +815,33 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, { int i = 0; #if CV_SIMD - for( ; i <= size.width - (v_int32::nlanes * 4); i += v_int32::nlanes * 4 ) +#if CV_CHECK_ALIGNMENT + if (isAligned(src0, src1, dst0, dst1)) +#endif { - v_int32 t0 = vx_load((int*)(src0 + i)); - v_int32 t1 = vx_load((int*)(src1 + i)); - vx_store((int*)(dst0 + i), t1); - vx_store((int*)(dst1 + i), t0); + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_int32 t0 = vx_load((int*)(src0 + i)); + v_int32 t1 = vx_load((int*)(src1 + i)); + vx_store((int*)(dst0 + i), t1); + vx_store((int*)(dst1 + i), t0); + } } +#if CV_CHECK_ALIGNMENT + else + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_uint8 t0 = vx_load(src0 + i); + v_uint8 t1 = vx_load(src1 + i); + vx_store(dst0 + i, t1); + vx_store(dst1 + i, t0); + } + } +#endif #endif - if( ((size_t)src0|(size_t)dst0|(size_t)src1|(size_t)dst1) % sizeof(int) == 0 ) + if (isAligned(src0, src1, dst0, dst1)) { for( ; i <= size.width - 16; i += 16 ) { From 9cd1d087c38dfa41d4606b53a40496871ae4e875 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 19 Dec 2019 00:29:53 +0000 Subject: [PATCH 08/11] android(camera2): apply .disconnectCamera() patch from issue 13574 --- .../java/org/opencv/android/JavaCamera2View.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java index 09e01b01fc..5eac339c12 100644 --- a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java +++ b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java @@ -230,7 +230,7 @@ public class JavaCamera2View extends CameraBridgeViewBase { @Override protected void disconnectCamera() { - Log.i(LOGTAG, "closeCamera"); + Log.i(LOGTAG, "close camera"); try { CameraDevice c = mCameraDevice; mCameraDevice = null; @@ -241,13 +241,14 @@ public class JavaCamera2View extends CameraBridgeViewBase { if (null != c) { c.close(); } + } finally { + stopBackgroundThread(); if (null != mImageReader) { mImageReader.close(); mImageReader = null; } - } finally { - stopBackgroundThread(); } + Log.i(LOGTAG, "camera closed!"); } public static class JavaCameraSizeAccessor implements ListItemAccessor { From e801f0e954a4d095b0240c0d83cfb5376a6f0e85 Mon Sep 17 00:00:00 2001 From: Sebastien Wybo Date: Thu, 19 Dec 2019 10:59:18 +0100 Subject: [PATCH 09/11] Merge pull request #16011 from sebastien-wybo:fix_16007 * Fix #16007 - colinearity computed using all 3 coordinates * calib3d(test): estimateAffine3D regression 16007 --- modules/calib3d/src/ptsetreg.cpp | 6 +++--- modules/calib3d/test/test_affine3d_estimator.cpp | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modules/calib3d/src/ptsetreg.cpp b/modules/calib3d/src/ptsetreg.cpp index 88e1815c35..9b2ec7857d 100644 --- a/modules/calib3d/src/ptsetreg.cpp +++ b/modules/calib3d/src/ptsetreg.cpp @@ -488,13 +488,13 @@ public: for(j = 0; j < i; ++j) { Point3f d1 = ptr[j] - ptr[i]; - float n1 = d1.x*d1.x + d1.y*d1.y; + float n1 = d1.x*d1.x + d1.y*d1.y + d1.z*d1.z; for(k = 0; k < j; ++k) { Point3f d2 = ptr[k] - ptr[i]; - float denom = (d2.x*d2.x + d2.y*d2.y)*n1; - float num = d1.x*d2.x + d1.y*d2.y; + float denom = (d2.x*d2.x + d2.y*d2.y + d2.z*d2.z)*n1; + float num = d1.x*d2.x + d1.y*d2.y + d1.z*d2.z; if( num*num > threshold*threshold*denom ) return false; diff --git a/modules/calib3d/test/test_affine3d_estimator.cpp b/modules/calib3d/test/test_affine3d_estimator.cpp index 9c3821bbc4..dba09afa40 100644 --- a/modules/calib3d/test/test_affine3d_estimator.cpp +++ b/modules/calib3d/test/test_affine3d_estimator.cpp @@ -192,4 +192,18 @@ void CV_Affine3D_EstTest::run( int /* start_from */) TEST(Calib3d_EstimateAffine3D, accuracy) { CV_Affine3D_EstTest test; test.safe_run(); } +TEST(Calib3d_EstimateAffine3D, regression_16007) +{ + std::vector m1, m2; + m1.push_back(Point3f(1.0f, 0.0f, 0.0f)); m2.push_back(Point3f(1.0f, 1.0f, 0.0f)); + m1.push_back(Point3f(1.0f, 0.0f, 1.0f)); m2.push_back(Point3f(1.0f, 1.0f, 1.0f)); + m1.push_back(Point3f(0.5f, 0.0f, 0.5f)); m2.push_back(Point3f(0.5f, 1.0f, 0.5f)); + m1.push_back(Point3f(2.5f, 0.0f, 2.5f)); m2.push_back(Point3f(2.5f, 1.0f, 2.5f)); + m1.push_back(Point3f(2.0f, 0.0f, 1.0f)); m2.push_back(Point3f(2.0f, 1.0f, 1.0f)); + + cv::Mat m3D, inl; + int res = cv::estimateAffine3D(m1, m2, m3D, inl); + EXPECT_EQ(1, res); +} + }} // namespace From 5bf73457431b7d2cb87ac8c107865388dbf66642 Mon Sep 17 00:00:00 2001 From: jeffeDurand Date: Thu, 19 Dec 2019 05:02:48 -0500 Subject: [PATCH 10/11] Merge pull request #16090 from jeffeDurand:cuda_mog2_issue_5296 * cuda_mog2_issue_5296 --- modules/cudabgsegm/src/cuda/mog2.cu | 581 +++++++++++++-------------- modules/cudabgsegm/src/cuda/mog2.hpp | 37 ++ modules/cudabgsegm/src/mog2.cpp | 354 ++++++++-------- 3 files changed, 493 insertions(+), 479 deletions(-) create mode 100644 modules/cudabgsegm/src/cuda/mog2.hpp diff --git a/modules/cudabgsegm/src/cuda/mog2.cu b/modules/cudabgsegm/src/cuda/mog2.cu index 789afa47a9..46891c688f 100644 --- a/modules/cudabgsegm/src/cuda/mog2.cu +++ b/modules/cudabgsegm/src/cuda/mog2.cu @@ -47,393 +47,372 @@ #include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/limits.hpp" -namespace cv { namespace cuda { namespace device -{ - namespace mog2 - { - /////////////////////////////////////////////////////////////// - // Utility - - __device__ __forceinline__ float cvt(uchar val) - { - return val; - } - __device__ __forceinline__ float3 cvt(const uchar3& val) - { - return make_float3(val.x, val.y, val.z); - } - __device__ __forceinline__ float4 cvt(const uchar4& val) - { - return make_float4(val.x, val.y, val.z, val.w); - } - - __device__ __forceinline__ float sqr(float val) - { - return val * val; - } - __device__ __forceinline__ float sqr(const float3& val) - { - return val.x * val.x + val.y * val.y + val.z * val.z; - } - __device__ __forceinline__ float sqr(const float4& val) - { - return val.x * val.x + val.y * val.y + val.z * val.z; - } +#include "mog2.hpp" - __device__ __forceinline__ float sum(float val) - { - return val; - } - __device__ __forceinline__ float sum(const float3& val) - { - return val.x + val.y + val.z; - } - __device__ __forceinline__ float sum(const float4& val) - { - return val.x + val.y + val.z; - } - - template - __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) - { - typename Ptr2D::elem_type val = ptr(k * rows + y, x); - ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); - ptr((k + 1) * rows + y, x) = val; - } - - /////////////////////////////////////////////////////////////// - // MOG2 +namespace cv +{ +namespace cuda +{ +namespace device +{ +namespace mog2 +{ +/////////////////////////////////////////////////////////////// +// Utility - __constant__ int c_nmixtures; - __constant__ float c_Tb; - __constant__ float c_TB; - __constant__ float c_Tg; - __constant__ float c_varInit; - __constant__ float c_varMin; - __constant__ float c_varMax; - __constant__ float c_tau; - __constant__ unsigned char c_shadowVal; +__device__ __forceinline__ float cvt(uchar val) +{ + return val; +} +__device__ __forceinline__ float3 cvt(const uchar3 &val) +{ + return make_float3(val.x, val.y, val.z); +} +__device__ __forceinline__ float4 cvt(const uchar4 &val) +{ + return make_float4(val.x, val.y, val.z, val.w); +} - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) - { - varMin = ::fminf(varMin, varMax); - varMax = ::fmaxf(varMin, varMax); - - cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); - } +__device__ __forceinline__ float sqr(float val) +{ + return val * val; +} +__device__ __forceinline__ float sqr(const float3 &val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} +__device__ __forceinline__ float sqr(const float4 &val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} - template - __global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, - PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, - const float alphaT, const float alpha1, const float prune) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; +__device__ __forceinline__ float sum(float val) +{ + return val; +} +__device__ __forceinline__ float sum(const float3 &val) +{ + return val.x + val.y + val.z; +} +__device__ __forceinline__ float sum(const float4 &val) +{ + return val.x + val.y + val.z; +} - if (x >= frame.cols || y >= frame.rows) - return; +template +__device__ __forceinline__ void swap(Ptr2D &ptr, int x, int y, int k, int rows) +{ + typename Ptr2D::elem_type val = ptr(k * rows + y, x); + ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); + ptr((k + 1) * rows + y, x) = val; +} + +/////////////////////////////////////////////////////////////// +// MOG2 + +template +__global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, + PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, + const float alphaT, const float alpha1, const float prune, const Constants *const constants) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - WorkT pix = cvt(frame(y, x)); + if (x < frame.cols && y < frame.rows) + { + WorkT pix = cvt(frame(y, x)); - //calculate distances to the modes (+ sort) - //here we need to go in descending order!!! + //calculate distances to the modes (+ sort) + //here we need to go in descending order!!! - bool background = false; // true - the pixel classified as background + bool background = false; // true - the pixel classified as background - //internal: + //internal: - bool fitsPDF = false; //if it remains zero a new GMM mode will be added + bool fitsPDF = false; //if it remains zero a new GMM mode will be added - int nmodes = modesUsed(y, x); - int nNewModes = nmodes; //current number of modes in GMM + int nmodes = modesUsed(y, x); + const int nNewModes = nmodes; //current number of modes in GMM - float totalWeight = 0.0f; + float totalWeight = 0.0f; - //go through all modes + //go through all modes - for (int mode = 0; mode < nmodes; ++mode) + for (int mode = 0; mode < nmodes; ++mode) + { + //need only weight if fit is found + float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; + int swap_count = 0; + //fit not found yet + if (!fitsPDF) { - //need only weight if fit is found - float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; - int swap_count = 0; - //fit not found yet - if (!fitsPDF) - { - //check if it belongs to some of the remaining modes - float var = gmm_variance(mode * frame.rows + y, x); - - WorkT mean = gmm_mean(mode * frame.rows + y, x); + //check if it belongs to some of the remaining modes + const float var = gmm_variance(mode * frame.rows + y, x); - //calculate difference and distance - WorkT diff = mean - pix; - float dist2 = sqr(diff); + const WorkT mean = gmm_mean(mode * frame.rows + y, x); - //background? - Tb - usually larger than Tg - if (totalWeight < c_TB && dist2 < c_Tb * var) - background = true; + //calculate difference and distance + const WorkT diff = mean - pix; + const float dist2 = sqr(diff); - //check fit - if (dist2 < c_Tg * var) - { - //belongs to the mode - fitsPDF = true; + //background? - Tb - usually larger than Tg + if (totalWeight < constants->TB_ && dist2 < constants->Tb_ * var) + background = true; - //update distribution + //check fit + if (dist2 < constants->Tg_ * var) + { + //belongs to the mode + fitsPDF = true; - //update weight - weight += alphaT; - float k = alphaT / weight; + //update distribution - //update mean - gmm_mean(mode * frame.rows + y, x) = mean - k * diff; + //update weight + weight += alphaT; + float k = alphaT / weight; - //update variance - float varnew = var + k * (dist2 - var); + //update mean + gmm_mean(mode * frame.rows + y, x) = mean - k * diff; - //limit the variance - varnew = ::fmaxf(varnew, c_varMin); - varnew = ::fminf(varnew, c_varMax); + //update variance + float varnew = var + k * (dist2 - var); - gmm_variance(mode * frame.rows + y, x) = varnew; + //limit the variance + varnew = ::fmaxf(varnew, constants->varMin_); + varnew = ::fminf(varnew, constants->varMax_); - //sort - //all other weights are at the same place and - //only the matched (iModes) is higher -> just find the new place for it + gmm_variance(mode * frame.rows + y, x) = varnew; - for (int i = mode; i > 0; --i) - { - //check one up - if (weight < gmm_weight((i - 1) * frame.rows + y, x)) - break; + //sort + //all other weights are at the same place and + //only the matched (iModes) is higher -> just find the new place for it - swap_count++; - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } + for (int i = mode; i > 0; --i) + { + //check one up + if (weight < gmm_weight((i - 1) * frame.rows + y, x)) + break; - //belongs to the mode - bFitsPDF becomes 1 + swap_count++; + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); } - } // !fitsPDF - //check prune - if (weight < -prune) - { - weight = 0.0f; - nmodes--; + //belongs to the mode - bFitsPDF becomes 1 } + } // !fitsPDF - gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value - totalWeight += weight; + //check prune + if (weight < -prune) + { + weight = 0.0f; + nmodes--; } - //renormalize weights + gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value + totalWeight += weight; + } - totalWeight = 1.f / totalWeight; - for (int mode = 0; mode < nmodes; ++mode) - gmm_weight(mode * frame.rows + y, x) *= totalWeight; + //renormalize weights - nmodes = nNewModes; + totalWeight = 1.f / totalWeight; + for (int mode = 0; mode < nmodes; ++mode) + gmm_weight(mode * frame.rows + y, x) *= totalWeight; - //make new mode if needed and exit + nmodes = nNewModes; - if (!fitsPDF) - { - // replace the weakest or add a new one - int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; + //make new mode if needed and exit - if (nmodes == 1) - gmm_weight(mode * frame.rows + y, x) = 1.f; - else - { - gmm_weight(mode * frame.rows + y, x) = alphaT; + if (!fitsPDF) + { + // replace the weakest or add a new one + const int mode = nmodes == constants->nmixtures_ ? constants->nmixtures_ - 1 : nmodes++; - // renormalize all other weights + if (nmodes == 1) + gmm_weight(mode * frame.rows + y, x) = 1.f; + else + { + gmm_weight(mode * frame.rows + y, x) = alphaT; - for (int i = 0; i < nmodes - 1; ++i) - gmm_weight(i * frame.rows + y, x) *= alpha1; - } + // renormalize all other weights - // init + for (int i = 0; i < nmodes - 1; ++i) + gmm_weight(i * frame.rows + y, x) *= alpha1; + } - gmm_mean(mode * frame.rows + y, x) = pix; - gmm_variance(mode * frame.rows + y, x) = c_varInit; + // init - //sort - //find the new place for it + gmm_mean(mode * frame.rows + y, x) = pix; + gmm_variance(mode * frame.rows + y, x) = constants->varInit_; - for (int i = nmodes - 1; i > 0; --i) - { - // check one up - if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) - break; + //sort + //find the new place for it - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } + for (int i = nmodes - 1; i > 0; --i) + { + // check one up + if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) + break; + + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); } + } - //set the number of modes - modesUsed(y, x) = nmodes; + //set the number of modes + modesUsed(y, x) = nmodes; - bool isShadow = false; - if (detectShadows && !background) - { - float tWeight = 0.0f; + bool isShadow = false; + if (detectShadows && !background) + { + float tWeight = 0.0f; - // check all the components marked as background: - for (int mode = 0; mode < nmodes; ++mode) - { - WorkT mean = gmm_mean(mode * frame.rows + y, x); + // check all the components marked as background: + for (int mode = 0; mode < nmodes; ++mode) + { + const WorkT mean = gmm_mean(mode * frame.rows + y, x); - WorkT pix_mean = pix * mean; + const WorkT pix_mean = pix * mean; - float numerator = sum(pix_mean); - float denominator = sqr(mean); + const float numerator = sum(pix_mean); + const float denominator = sqr(mean); - // no division by zero allowed - if (denominator == 0) - break; - - // if tau < a < 1 then also check the color distortion - if (numerator <= denominator && numerator >= c_tau * denominator) - { - float a = numerator / denominator; + // no division by zero allowed + if (denominator == 0) + break; - WorkT dD = a * mean - pix; + // if tau < a < 1 then also check the color distortion + else if (numerator <= denominator && numerator >= constants->tau_ * denominator) + { + const float a = numerator / denominator; - if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) - { - isShadow = true; - break; - } - }; + WorkT dD = a * mean - pix; - tWeight += gmm_weight(mode * frame.rows + y, x); - if (tWeight > c_TB) + if (sqr(dD) < constants->Tb_ * gmm_variance(mode * frame.rows + y, x) * a * a) + { + isShadow = true; break; - } - } + } + }; - fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; + tWeight += gmm_weight(mode * frame.rows + y, x); + if (tWeight > constants->TB_) + break; + } } - template - void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - - const float alpha1 = 1.0f - alphaT; + fgmask(y, x) = background ? 0 : isShadow ? constants->shadowVal_ : 255; + } +} - if (detectShadows) - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); +template +void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream) +{ + dim3 block(32, 8); + dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } - else - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); + const float alpha1 = 1.0f - alphaT; - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } + if (detectShadows) + { + cudaSafeCall(cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1)); - cudaSafeCall( cudaGetLastError() ); + mog2<<>>((PtrStepSz)frame, fgmask, modesUsed, + weight, variance, (PtrStepSz)mean, + alphaT, alpha1, prune, constants); + } + else + { + cudaSafeCall(cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1)); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + mog2<<>>((PtrStepSz)frame, fgmask, modesUsed, + weight, variance, (PtrStepSz)mean, + alphaT, alpha1, prune, constants); + } - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + cudaSafeCall(cudaGetLastError()); - static const func_t funcs[] = - { - 0, mog2_caller, 0, mog2_caller, mog2_caller - }; + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); +} - funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); - } +void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream) +{ + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream); - template - __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst) + static const func_t funcs[] = { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; + 0, mog2_caller, 0, mog2_caller, mog2_caller}; - if (x >= modesUsed.cols || y >= modesUsed.rows) - return; + funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, constants, stream); +} - int nmodes = modesUsed(y, x); +template +__global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst, const Constants *const constants) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - WorkT meanVal = VecTraits::all(0.0f); - float totalWeight = 0.0f; + if (x >= modesUsed.cols || y >= modesUsed.rows) + return; - for (int mode = 0; mode < nmodes; ++mode) - { - float weight = gmm_weight(mode * modesUsed.rows + y, x); + int nmodes = modesUsed(y, x); - WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); - meanVal = meanVal + weight * mean; + WorkT meanVal = VecTraits::all(0.0f); + float totalWeight = 0.0f; - totalWeight += weight; + for (int mode = 0; mode < nmodes; ++mode) + { + float weight = gmm_weight(mode * modesUsed.rows + y, x); - if(totalWeight > c_TB) - break; - } + WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); + meanVal = meanVal + weight * mean; - meanVal = meanVal * (1.f / totalWeight); + totalWeight += weight; - dst(y, x) = saturate_cast(meanVal); - } + if (totalWeight > constants->TB_) + break; + } - template - void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); + meanVal = meanVal * (1.f / totalWeight); - cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1) ); + dst(y, x) = saturate_cast(meanVal); +} - getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz) mean, (PtrStepSz) dst); - cudaSafeCall( cudaGetLastError() ); +template +void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream) +{ + dim3 block(32, 8); + dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + cudaSafeCall(cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1)); - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); + getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz)mean, (PtrStepSz)dst, constants); + cudaSafeCall(cudaGetLastError()); - static const func_t funcs[] = - { - 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller - }; + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); +} - funcs[cn](modesUsed, weight, mean, dst, stream); - } - } -}}} +void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream) +{ + typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream); + static const func_t funcs[] = + { + 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller}; + + funcs[cn](modesUsed, weight, mean, dst, constants, stream); +} +} // namespace mog2 +} // namespace device +} // namespace cuda +} // namespace cv #endif /* CUDA_DISABLER */ diff --git a/modules/cudabgsegm/src/cuda/mog2.hpp b/modules/cudabgsegm/src/cuda/mog2.hpp new file mode 100644 index 0000000000..5b2155195f --- /dev/null +++ b/modules/cudabgsegm/src/cuda/mog2.hpp @@ -0,0 +1,37 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CUDA_MOG2_H +#define OPENCV_CUDA_MOG2_H + +#include "opencv2/core/cuda.hpp" + +struct CUstream_st; +typedef struct CUstream_st *cudaStream_t; + +namespace cv { namespace cuda { + +class Stream; + +namespace device { namespace mog2 { + +typedef struct +{ + float Tb_; + float TB_; + float Tg_; + float varInit_; + float varMin_; + float varMax_; + float tau_; + int nmixtures_; + unsigned char shadowVal_; +} Constants; + +void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream); +void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream); + +} } } } + +#endif /* OPENCV_CUDA_MOG2_H */ diff --git a/modules/cudabgsegm/src/mog2.cpp b/modules/cudabgsegm/src/mog2.cpp index e727dcfdaf..47135a088b 100644 --- a/modules/cudabgsegm/src/mog2.cpp +++ b/modules/cudabgsegm/src/mog2.cpp @@ -41,209 +41,207 @@ //M*/ #include "precomp.hpp" +#include "cuda/mog2.hpp" using namespace cv; using namespace cv::cuda; +using namespace cv::cuda::device::mog2; #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -Ptr cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr(); } +Ptr cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) +{ + throw_no_cuda(); + return Ptr(); +} #else -namespace cv { namespace cuda { namespace device +namespace { - namespace mog2 - { - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal); - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); - } -}}} +// default parameters of gaussian background detection algorithm +const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 +const float defaultVarThreshold = 4.0f * 4.0f; +const int defaultNMixtures = 5; // maximal number of Gaussians in mixture +const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test +const float defaultVarThresholdGen = 3.0f * 3.0f; +const float defaultVarInit = 15.0f; // initial variance for new components +const float defaultVarMax = 5.0f * defaultVarInit; +const float defaultVarMin = 4.0f; + +// additional parameters +const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components +const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection +const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation + +class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2 +{ +public: + MOG2Impl(int history, double varThreshold, bool detectShadows); + ~MOG2Impl(); -namespace + void apply(InputArray image, OutputArray fgmask, double learningRate = -1) CV_OVERRIDE; + void apply(InputArray image, OutputArray fgmask, double learningRate, Stream &stream) CV_OVERRIDE; + + void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE; + void getBackgroundImage(OutputArray backgroundImage, Stream &stream) const CV_OVERRIDE; + + int getHistory() const CV_OVERRIDE { return history_; } + void setHistory(int history) CV_OVERRIDE { history_ = history; } + + int getNMixtures() const CV_OVERRIDE { return constantsHost_.nmixtures_; } + void setNMixtures(int nmixtures) CV_OVERRIDE { constantsHost_.nmixtures_ = nmixtures; } + + double getBackgroundRatio() const CV_OVERRIDE { return constantsHost_.TB_; } + void setBackgroundRatio(double ratio) CV_OVERRIDE { constantsHost_.TB_ = (float)ratio; } + + double getVarThreshold() const CV_OVERRIDE { return constantsHost_.Tb_; } + void setVarThreshold(double varThreshold) CV_OVERRIDE { constantsHost_.Tb_ = (float)varThreshold; } + + double getVarThresholdGen() const CV_OVERRIDE { return constantsHost_.Tg_; } + void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { constantsHost_.Tg_ = (float)varThresholdGen; } + + double getVarInit() const CV_OVERRIDE { return constantsHost_.varInit_; } + void setVarInit(double varInit) CV_OVERRIDE { constantsHost_.varInit_ = (float)varInit; } + + double getVarMin() const CV_OVERRIDE { return constantsHost_.varMin_; } + void setVarMin(double varMin) CV_OVERRIDE { constantsHost_.varMin_ = ::fminf((float)varMin, constantsHost_.varMax_); } + + double getVarMax() const CV_OVERRIDE { return constantsHost_.varMax_; } + void setVarMax(double varMax) CV_OVERRIDE { constantsHost_.varMax_ = ::fmaxf(constantsHost_.varMin_, (float)varMax); } + + double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; } + void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float)ct; } + + bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; } + void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; } + + int getShadowValue() const CV_OVERRIDE { return constantsHost_.shadowVal_; } + void setShadowValue(int value) CV_OVERRIDE { constantsHost_.shadowVal_ = (uchar)value; } + + double getShadowThreshold() const CV_OVERRIDE { return constantsHost_.tau_; } + void setShadowThreshold(double threshold) CV_OVERRIDE { constantsHost_.tau_ = (float)threshold; } + +private: + void initialize(Size frameSize, int frameType, Stream &stream); + + Constants constantsHost_; + Constants *constantsDevice_; + + int history_; + float ct_; + bool detectShadows_; + + Size frameSize_; + int frameType_; + int nframes_; + + GpuMat weight_; + GpuMat variance_; + GpuMat mean_; + + //keep track of number of modes per pixel + GpuMat bgmodelUsedModes_; +}; + +MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : frameSize_(0, 0), frameType_(0), nframes_(0) +{ + history_ = history > 0 ? history : defaultHistory; + detectShadows_ = detectShadows; + ct_ = defaultCT; + + setNMixtures(defaultNMixtures); + setBackgroundRatio(defaultBackgroundRatio); + setVarInit(defaultVarInit); + setVarMin(defaultVarMin); + setVarMax(defaultVarMax); + setVarThreshold(varThreshold > 0 ? (float)varThreshold : defaultVarThreshold); + setVarThresholdGen(defaultVarThresholdGen); + + setShadowValue(defaultShadowValue); + setShadowThreshold(defaultShadowThreshold); + + cudaSafeCall(cudaMalloc((void **)&constantsDevice_, sizeof(Constants))); +} + +MOG2Impl::~MOG2Impl() +{ + cudaFree(constantsDevice_); +} + +void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate) +{ + apply(image, fgmask, learningRate, Stream::Null()); +} + +void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream &stream) { - // default parameters of gaussian background detection algorithm - const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 - const float defaultVarThreshold = 4.0f * 4.0f; - const int defaultNMixtures = 5; // maximal number of Gaussians in mixture - const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test - const float defaultVarThresholdGen = 3.0f * 3.0f; - const float defaultVarInit = 15.0f; // initial variance for new components - const float defaultVarMax = 5.0f * defaultVarInit; - const float defaultVarMin = 4.0f; - - // additional parameters - const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components - const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection - const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation - - class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2 - { - public: - MOG2Impl(int history, double varThreshold, bool detectShadows); - - void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE; - void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) CV_OVERRIDE; - - void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE; - void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const CV_OVERRIDE; - - int getHistory() const CV_OVERRIDE { return history_; } - void setHistory(int history) CV_OVERRIDE { history_ = history; } - - int getNMixtures() const CV_OVERRIDE { return nmixtures_; } - void setNMixtures(int nmixtures) CV_OVERRIDE { nmixtures_ = nmixtures; } - - double getBackgroundRatio() const CV_OVERRIDE { return backgroundRatio_; } - void setBackgroundRatio(double ratio) CV_OVERRIDE { backgroundRatio_ = (float) ratio; } - - double getVarThreshold() const CV_OVERRIDE { return varThreshold_; } - void setVarThreshold(double varThreshold) CV_OVERRIDE { varThreshold_ = (float) varThreshold; } - - double getVarThresholdGen() const CV_OVERRIDE { return varThresholdGen_; } - void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { varThresholdGen_ = (float) varThresholdGen; } - - double getVarInit() const CV_OVERRIDE { return varInit_; } - void setVarInit(double varInit) CV_OVERRIDE { varInit_ = (float) varInit; } - - double getVarMin() const CV_OVERRIDE { return varMin_; } - void setVarMin(double varMin) CV_OVERRIDE { varMin_ = (float) varMin; } - - double getVarMax() const CV_OVERRIDE { return varMax_; } - void setVarMax(double varMax) CV_OVERRIDE { varMax_ = (float) varMax; } - - double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; } - void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float) ct; } - - bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; } - void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; } - - int getShadowValue() const CV_OVERRIDE { return shadowValue_; } - void setShadowValue(int value) CV_OVERRIDE { shadowValue_ = (uchar) value; } + using namespace cv::cuda::device::mog2; + + GpuMat frame = _frame.getGpuMat(); - double getShadowThreshold() const CV_OVERRIDE { return shadowThreshold_; } - void setShadowThreshold(double threshold) CV_OVERRIDE { shadowThreshold_ = (float) threshold; } - - private: - void initialize(Size frameSize, int frameType); - - int history_; - int nmixtures_; - float backgroundRatio_; - float varThreshold_; - float varThresholdGen_; - float varInit_; - float varMin_; - float varMax_; - float ct_; - bool detectShadows_; - uchar shadowValue_; - float shadowThreshold_; - - Size frameSize_; - int frameType_; - int nframes_; - - GpuMat weight_; - GpuMat variance_; - GpuMat mean_; - - //keep track of number of modes per pixel - GpuMat bgmodelUsedModes_; - }; - - MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : - frameSize_(0, 0), frameType_(0), nframes_(0) - { - history_ = history > 0 ? history : defaultHistory; - varThreshold_ = varThreshold > 0 ? (float) varThreshold : defaultVarThreshold; - detectShadows_ = detectShadows; - - nmixtures_ = defaultNMixtures; - backgroundRatio_ = defaultBackgroundRatio; - varInit_ = defaultVarInit; - varMax_ = defaultVarMax; - varMin_ = defaultVarMin; - varThresholdGen_ = defaultVarThresholdGen; - ct_ = defaultCT; - shadowValue_ = defaultShadowValue; - shadowThreshold_ = defaultShadowThreshold; - } + int ch = frame.channels(); + int work_ch = ch; - void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate) - { - apply(image, fgmask, learningRate, Stream::Null()); - } + if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels()) + initialize(frame.size(), frame.type(), stream); - void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream) - { - using namespace cv::cuda::device::mog2; + _fgmask.create(frameSize_, CV_8UC1); + GpuMat fgmask = _fgmask.getGpuMat(); - GpuMat frame = _frame.getGpuMat(); + fgmask.setTo(Scalar::all(0), stream); - int ch = frame.channels(); - int work_ch = ch; + ++nframes_; + learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_); + CV_Assert(learningRate >= 0); - if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels()) - initialize(frame.size(), frame.type()); + mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, + (float)learningRate, static_cast(-learningRate * ct_), detectShadows_, constantsDevice_, StreamAccessor::getStream(stream)); +} + +void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const +{ + getBackgroundImage(backgroundImage, Stream::Null()); +} + +void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream &stream) const +{ + using namespace cv::cuda::device::mog2; + + _backgroundImage.create(frameSize_, frameType_); + GpuMat backgroundImage = _backgroundImage.getGpuMat(); + + getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, constantsDevice_, StreamAccessor::getStream(stream)); +} + +void MOG2Impl::initialize(cv::Size frameSize, int frameType, Stream &stream) +{ + using namespace cv::cuda::device::mog2; - _fgmask.create(frameSize_, CV_8UC1); - GpuMat fgmask = _fgmask.getGpuMat(); + CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4); - fgmask.setTo(Scalar::all(0), stream); + frameSize_ = frameSize; + frameType_ = frameType; + nframes_ = 0; - ++nframes_; - learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_); - CV_Assert( learningRate >= 0 ); + const int ch = CV_MAT_CN(frameType); + const int work_ch = ch; - mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, - (float) learningRate, static_cast(-learningRate * ct_), detectShadows_, StreamAccessor::getStream(stream)); - } + // for each gaussian mixture of each pixel bg model we store ... + // the mixture weight (w), + // the mean (nchannels values) and + // the covariance + weight_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1); + variance_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1); + mean_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC(work_ch)); - void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const - { - getBackgroundImage(backgroundImage, Stream::Null()); - } - - void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const - { - using namespace cv::cuda::device::mog2; - - _backgroundImage.create(frameSize_, frameType_); - GpuMat backgroundImage = _backgroundImage.getGpuMat(); - - getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream)); - } - - void MOG2Impl::initialize(cv::Size frameSize, int frameType) - { - using namespace cv::cuda::device::mog2; - - CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 ); - - frameSize_ = frameSize; - frameType_ = frameType; - nframes_ = 0; - - int ch = CV_MAT_CN(frameType); - int work_ch = ch; - - // for each gaussian mixture of each pixel bg model we store ... - // the mixture weight (w), - // the mean (nchannels values) and - // the covariance - weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); - - //make the array for keeping track of the used modes per pixel - all zeros at start - bgmodelUsedModes_.create(frameSize_, CV_8UC1); - bgmodelUsedModes_.setTo(Scalar::all(0)); + //make the array for keeping track of the used modes per pixel - all zeros at start + bgmodelUsedModes_.create(frameSize_, CV_8UC1); + bgmodelUsedModes_.setTo(Scalar::all(0)); - loadConstants(nmixtures_, varThreshold_, backgroundRatio_, varThresholdGen_, varInit_, varMin_, varMax_, shadowThreshold_, shadowValue_); - } + cudaSafeCall(cudaMemcpyAsync(constantsDevice_, &constantsHost_, sizeof(Constants), cudaMemcpyHostToDevice, StreamAccessor::getStream(stream))); } +} // namespace Ptr cv::cuda::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows) { From 4733a19babec760ba237b8c277bb1de664a641c1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 19 Dec 2019 13:20:42 +0300 Subject: [PATCH 11/11] Merge pull request #16194 from alalek:fix_16192 * imgproc(test): resize(LANCZOS4) reproducer 16192 * imgproc: fix resize LANCZOS4 coefficients generation --- modules/imgproc/src/resize.cpp | 23 +++++++++++++---------- modules/imgproc/test/test_imgwarp.cpp | 13 +++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index cc967cf469..02f78819de 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -920,20 +920,23 @@ static inline void interpolateLanczos4( float x, float* coeffs ) static const double cs[][2]= {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}}; - if( x < FLT_EPSILON ) - { - for( int i = 0; i < 8; i++ ) - coeffs[i] = 0; - coeffs[3] = 1; - return; - } - float sum = 0; double y0=-(x+3)*CV_PI*0.25, s0 = std::sin(y0), c0= std::cos(y0); for(int i = 0; i < 8; i++ ) { - double y = -(x+3-i)*CV_PI*0.25; - coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y)); + float y0_ = (x+3-i); + if (fabs(y0_) >= 1e-6f) + { + double y = -y0_*CV_PI*0.25; + coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y)); + } + else + { + // special handling for 'x' values: + // - ~0.0: 0 0 0 1 0 0 0 0 + // - ~1.0: 0 0 0 0 1 0 0 0 + coeffs[i] = 1e30f; + } sum += coeffs[i]; } diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp index 400426af15..232f374548 100644 --- a/modules/imgproc/test/test_imgwarp.cpp +++ b/modules/imgproc/test/test_imgwarp.cpp @@ -1708,6 +1708,19 @@ TEST(Resize, Area_half) } } +TEST(Resize, lanczos4_regression_16192) +{ + Size src_size(11, 17); + Size dst_size(11, 153); + Mat src(src_size, CV_8UC3, Scalar::all(128)); + Mat dst(dst_size, CV_8UC3, Scalar::all(255)); + + cv::resize(src, dst, dst_size, 0, 0, INTER_LANCZOS4); + + Mat expected(dst_size, CV_8UC3, Scalar::all(128)); + EXPECT_EQ(cvtest::norm(dst, expected, NORM_INF), 0) << dst(Rect(0,0,8,8)); +} + TEST(Imgproc_Warp, multichannel) { static const int inter_types[] = {INTER_NEAREST, INTER_AREA, INTER_CUBIC,