diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 65029ddbff..fae91c165f 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -1624,7 +1624,7 @@ endif() macro(ocv_git_describe var_name path) if(GIT_FOUND) - execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --tags --exact-match --dirty + execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --exact-match --dirty WORKING_DIRECTORY "${path}" OUTPUT_VARIABLE ${var_name} RESULT_VARIABLE GIT_RESULT diff --git a/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown b/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown index a46f578feb..e04ed4ee1a 100644 --- a/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown +++ b/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown @@ -16,42 +16,152 @@ Theory Code ---- +@add_toggle_cpp This tutorial code's is shown lines below. You can also download it from - [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp). +[here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp). @include samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp +@end_toggle + +@add_toggle_java +This tutorial code's is shown lines below. You can also download it from +[here](https://github.com/opencv/opencv/tree/master/samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java) +@include samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java +@end_toggle + +@add_toggle_python +This tutorial code's is shown lines below. You can also download it from +[here](https://github.com/opencv/opencv/tree/master/samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py) +@include samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py +@end_toggle Explanation / Result -------------------- --# Load the source image and check if it is loaded without any problem, then show it: - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp load_image - ![](images/source.jpeg) +- Load the source image and check if it is loaded without any problem, then show it: + +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp load_image +@end_toggle + +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java load_image +@end_toggle + +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py load_image +@end_toggle + +![](images/source.jpeg) + +- Then if we have an image with a white background, it is good to transform it to black. This will help us to discriminate the foreground objects easier when we will apply the Distance Transform: + +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp black_bg +@end_toggle + +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java black_bg +@end_toggle + +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py black_bg +@end_toggle + +![](images/black_bg.jpeg) + +- Afterwards we will sharpen our image in order to acute the edges of the foreground objects. We will apply a laplacian filter with a quite strong filter (an approximation of second derivative): + +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp sharp +@end_toggle + +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java sharp +@end_toggle + +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py sharp +@end_toggle + +![](images/laplace.jpeg) +![](images/sharp.jpeg) + +- Now we transform our new sharpened source image to a grayscale and a binary one, respectively: + +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp bin +@end_toggle + +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java bin +@end_toggle + +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py bin +@end_toggle + +![](images/bin.jpeg) + +- We are ready now to apply the Distance Transform on the binary image. Moreover, we normalize the output image in order to be able visualize and threshold the result: + +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp dist +@end_toggle + +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java dist +@end_toggle + +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py dist +@end_toggle + +![](images/dist_transf.jpeg) + +- We threshold the *dist* image and then perform some morphology operation (i.e. dilation) in order to extract the peaks from the above image: + +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp peaks +@end_toggle + +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java peaks +@end_toggle + +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py peaks +@end_toggle + +![](images/peaks.jpeg) + +- From each blob then we create a seed/marker for the watershed algorithm with the help of the @ref cv::findContours function: + +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp seeds +@end_toggle + +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java seeds +@end_toggle --# Then if we have an image with a white background, it is good to transform it to black. This will help us to discriminate the foreground objects easier when we will apply the Distance Transform: - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp black_bg - ![](images/black_bg.jpeg) +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py seeds +@end_toggle --# Afterwards we will sharpen our image in order to acute the edges of the foreground objects. We will apply a laplacian filter with a quite strong filter (an approximation of second derivative): - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp sharp - ![](images/laplace.jpeg) - ![](images/sharp.jpeg) +![](images/markers.jpeg) --# Now we transform our new sharpened source image to a grayscale and a binary one, respectively: - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp bin - ![](images/bin.jpeg) +- Finally, we can apply the watershed algorithm, and visualize the result: --# We are ready now to apply the Distance Transform on the binary image. Moreover, we normalize the output image in order to be able visualize and threshold the result: - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp dist - ![](images/dist_transf.jpeg) +@add_toggle_cpp +@snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp watershed +@end_toggle --# We threshold the *dist* image and then perform some morphology operation (i.e. dilation) in order to extract the peaks from the above image: - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp peaks - ![](images/peaks.jpeg) +@add_toggle_java +@snippet samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java watershed +@end_toggle --# From each blob then we create a seed/marker for the watershed algorithm with the help of the @ref cv::findContours function: - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp seeds - ![](images/markers.jpeg) +@add_toggle_python +@snippet samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py watershed +@end_toggle --# Finally, we can apply the watershed algorithm, and visualize the result: - @snippet samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp watershed - ![](images/final.jpeg) \ No newline at end of file +![](images/final.jpeg) diff --git a/doc/tutorials/imgproc/table_of_content_imgproc.markdown b/doc/tutorials/imgproc/table_of_content_imgproc.markdown index e3fac55924..59c985e1dd 100644 --- a/doc/tutorials/imgproc/table_of_content_imgproc.markdown +++ b/doc/tutorials/imgproc/table_of_content_imgproc.markdown @@ -285,6 +285,8 @@ In this section you will learn about the image processing (manipulation) functio - @subpage tutorial_distance_transform + *Languages:* C++, Java, Python + *Compatibility:* \> OpenCV 2.0 *Author:* Theodore Tsesmelis diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 83a707337f..7fd0b6af1e 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -1985,6 +1985,31 @@ CV_EXPORTS_W int decomposeHomographyMat(InputArray H, OutputArrayOfArrays translations, OutputArrayOfArrays normals); +/** @brief Filters homography decompositions based on additional information. + +@param rotations Vector of rotation matrices. +@param normals Vector of plane normal matrices. +@param beforePoints Vector of (rectified) visible reference points before the homography is applied +@param afterPoints Vector of (rectified) visible reference points after the homography is applied +@param possibleSolutions Vector of int indices representing the viable solution set after filtering +@param pointsMask optional Mat/Vector of 8u type representing the mask for the inliers as given by the findHomography function + +This function is intended to filter the output of the decomposeHomographyMat based on additional +information as described in @cite Malis . The summary of the method: the decomposeHomographyMat function +returns 2 unique solutions and their "opposites" for a total of 4 solutions. If we have access to the +sets of points visible in the camera frame before and after the homography transformation is applied, +we can determine which are the true potential solutions and which are the opposites by verifying which +homographies are consistent with all visible reference points being in front of the camera. The inputs +are left unchanged; the filtered solution set is returned as indices into the existing one. + +*/ +CV_EXPORTS_W void filterHomographyDecompByVisibleRefpoints(InputArrayOfArrays rotations, + InputArrayOfArrays normals, + InputArray beforePoints, + InputArray afterPoints, + OutputArray possibleSolutions, + InputArray pointsMask = noArray()); + /** @brief The base class for stereo correspondence algorithms. */ class CV_EXPORTS_W StereoMatcher : public Algorithm diff --git a/modules/calib3d/src/homography_decomp.cpp b/modules/calib3d/src/homography_decomp.cpp index 252da7967b..6975a7ef11 100644 --- a/modules/calib3d/src/homography_decomp.cpp +++ b/modules/calib3d/src/homography_decomp.cpp @@ -1,50 +1,51 @@ /*M/////////////////////////////////////////////////////////////////////////////////////// - // - // This is a homography decomposition implementation contributed to OpenCV - // by Samson Yilma. It implements the homography decomposition algorithm - // described in the research report: - // Malis, E and Vargas, M, "Deeper understanding of the homography decomposition - // for vision-based control", Research Report 6303, INRIA (2007) - // - // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. - // - // By downloading, copying, installing or using the software you agree to this license. - // If you do not agree to this license, do not download, install, - // copy or use the software. - // - // - // License Agreement - // For Open Source Computer Vision Library - // - // Copyright (C) 2014, Samson Yilma (samson_yilma@yahoo.com), all rights reserved. - // - // Third party copyrights are property of their respective owners. - // - // Redistribution and use in source and binary forms, with or without modification, - // are permitted provided that the following conditions are met: - // - // * Redistribution's of source code must retain the above copyright notice, - // this list of conditions and the following disclaimer. - // - // * Redistribution's in binary form must reproduce the above copyright notice, - // this list of conditions and the following disclaimer in the documentation - // and/or other materials provided with the distribution. - // - // * The name of the copyright holders may not be used to endorse or promote products - // derived from this software without specific prior written permission. - // - // This software is provided by the copyright holders and contributors "as is" and - // any express or implied warranties, including, but not limited to, the implied - // warranties of merchantability and fitness for a particular purpose are disclaimed. - // In no event shall the Intel Corporation or contributors be liable for any direct, - // indirect, incidental, special, exemplary, or consequential damages - // (including, but not limited to, procurement of substitute goods or services; - // loss of use, data, or profits; or business interruption) however caused - // and on any theory of liability, whether in contract, strict liability, - // or tort (including negligence or otherwise) arising in any way out of - // the use of this software, even if advised of the possibility of such damage. - // - //M*/ +// +// This is a homography decomposition implementation contributed to OpenCV +// by Samson Yilma. It implements the homography decomposition algorithm +// described in the research report: +// Malis, E and Vargas, M, "Deeper understanding of the homography decomposition +// for vision-based control", Research Report 6303, INRIA (2007) +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2014, Samson Yilma (samson_yilma@yahoo.com), all rights reserved. +// Copyright (C) 2018, Intel Corporation, all rights reserved. +// +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ #include "precomp.hpp" #include @@ -489,4 +490,67 @@ int decomposeHomographyMat(InputArray _H, return nsols; } +void filterHomographyDecompByVisibleRefpoints(InputArrayOfArrays _rotations, + InputArrayOfArrays _normals, + InputArray _beforeRectifiedPoints, + InputArray _afterRectifiedPoints, + OutputArray _possibleSolutions, + InputArray _pointsMask) +{ + CV_Assert(_beforeRectifiedPoints.type() == CV_32FC2 && _afterRectifiedPoints.type() == CV_32FC2); + CV_Assert(_pointsMask.empty() || _pointsMask.type() == CV_8U); + + Mat beforeRectifiedPoints = _beforeRectifiedPoints.getMat(); + Mat afterRectifiedPoints = _afterRectifiedPoints.getMat(); + Mat pointsMask = _pointsMask.getMat(); + int nsolutions = (int)_rotations.total(); + int npoints = (int)beforeRectifiedPoints.total(); + CV_Assert(pointsMask.empty() || pointsMask.checkVector(1, CV_8U) == npoints); + const uchar* pointsMaskPtr = pointsMask.data; + + std::vector solutionMask(nsolutions, (uchar)1); + std::vector normals(nsolutions); + std::vector rotnorm(nsolutions); + Mat R; + + for( int i = 0; i < nsolutions; i++ ) + { + _normals.getMat(i).convertTo(normals[i], CV_64F); + CV_Assert(normals[i].total() == 3); + _rotations.getMat(i).convertTo(R, CV_64F); + rotnorm[i] = R*normals[i]; + CV_Assert(rotnorm[i].total() == 3); + } + + for( int j = 0; j < npoints; j++ ) + { + if( !pointsMaskPtr || pointsMaskPtr[j] ) + { + Point2f prevPoint = beforeRectifiedPoints.at(j); + Point2f currPoint = afterRectifiedPoints.at(j); + + for( int i = 0; i < nsolutions; i++ ) + { + if( !solutionMask[i] ) + continue; + + const double* normal_i = normals[i].ptr(); + const double* rotnorm_i = rotnorm[i].ptr(); + double prevNormDot = normal_i[0]*prevPoint.x + normal_i[1]*prevPoint.y + normal_i[2]; + double currNormDot = rotnorm_i[0]*currPoint.x + rotnorm_i[1]*currPoint.y + rotnorm_i[2]; + + if (prevNormDot <= 0 || currNormDot <= 0) + solutionMask[i] = (uchar)0; + } + } + } + + std::vector possibleSolutions; + for( int i = 0; i < nsolutions; i++ ) + if( solutionMask[i] ) + possibleSolutions.push_back(i); + + Mat(possibleSolutions).copyTo(_possibleSolutions); +} + } //namespace cv diff --git a/modules/calib3d/test/test_filter_homography_decomp.cpp b/modules/calib3d/test/test_filter_homography_decomp.cpp new file mode 100644 index 0000000000..533d9ddf56 --- /dev/null +++ b/modules/calib3d/test/test_filter_homography_decomp.cpp @@ -0,0 +1,575 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2016, OpenCV Foundation, all rights reserved. +// +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/calib3d.hpp" + +namespace opencv_test { namespace { + +class CV_FilterHomographyDecompTest : public cvtest::BaseTest { + +public: + CV_FilterHomographyDecompTest() + { + buildTestDataSet(); + } + +protected: + void run(int) + { + vector finalSolutions; + filterHomographyDecompByVisibleRefpoints(_rotations, _normals, _prevRectifiedPoints, _currRectifiedPoints, finalSolutions, _mask); + + //there should be at least 2 solution + ASSERT_EQ(finalSolutions, _validSolutions); + } + +private: + + void buildTestDataSet() + { + double rotationsArray[4][9] = { + { + 0.98811084196540500, + -0.15276633082836735, + 0.017303530150126534, + 0.14161851662094097, + 0.94821044891315664, + 0.28432576443578628, + -0.059842791884259422, + -0.27849487021693553, + 0.95857156619751127 + }, + { + 0.98811084196540500, + -0.15276633082836735, + 0.017303530150126534, + 0.14161851662094097, + 0.94821044891315664, + 0.28432576443578628, + -0.059842791884259422, + -0.27849487021693553, + 0.95857156619751127 + }, + { + 0.95471096402077438, + -0.21080808634428211, + -0.20996886890771557, + 0.20702063153797226, + 0.97751379914116743, + -0.040115216641822840, + 0.21370407880090386, + -0.0051694506925720751, + 0.97688476468997820 + }, + { + 0.95471096402077438, + -0.21080808634428211, + -0.20996886890771557, + 0.20702063153797226, + 0.97751379914116743, + -0.040115216641822840, + 0.21370407880090386, + -0.0051694506925720751, + 0.97688476468997820 + } + }; + + double normalsArray[4][3] = { + { + -0.023560516110791116, + 0.085818414407956692, + 0.99603217911325403 + }, + { + 0.023560516110791116, + -0.085818414407956692, + -0.99603217911325403 + }, + { + -0.62483547397726014, + -0.56011861446691769, + 0.54391889853844289 + }, + { + 0.62483547397726014, + 0.56011861446691769, + -0.54391889853844289 + } + }; + + uchar maskArray[514] = + { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, + 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, + 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, + 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, + 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, + 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, + 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + static const float currRectifiedPointArr[] = + { + -0.565732896f, -0.321162999f, -0.416198403f, -0.299646467f, -0.408354312f, -0.290387660f, + -0.386555284f, -0.287677139f, -0.348475337f, -0.276208878f, -0.415957332f, -0.266133875f, + -0.354961902f, -0.257545590f, -0.420189440f, -0.255190015f, -0.379785866f, -0.252570540f, + -0.144345313f, -0.249134675f, -0.162417486f, -0.223227784f, -0.129876539f, -0.219722182f, + -0.470801264f, -0.211166814f, 0.0992607549f, -0.209064797f, 0.123508267f, -0.196303099f, + -0.521849990f, -0.190706849f, -0.513497114f, -0.189186409f, -0.534959674f, -0.185138911f, + 0.121614374f, -0.182721153f, 0.154205695f, -0.183763996f, -0.516449869f, -0.181606859f, + -0.523427486f, -0.180088669f, 0.149494573f, -0.179563865f, -0.552187204f, -0.172630817f, + -0.322249800f, -0.172333881f, 0.127574071f, -0.165683150f, 0.159817487f, -0.162389070f, + -0.578930736f, -0.160272732f, -0.600617707f, -0.155920163f, -0.249115735f, -0.154711768f, + -0.543279886f, -0.144873798f, -0.529992998f, -0.142433196f, 0.0554505363f, -0.142878756f, + -0.613355398f, -0.132748783f, 0.190059289f, -0.128930226f, -0.255682647f, -0.127393380f, + 0.0299431719f, -0.125339776f, -0.282943249f, -0.118550651f, -0.0348402821f, -0.115398556f, + -0.0362741761f, -0.110100254f, -0.319089264f, -0.104354575f, -0.0401916653f, -0.0852083191f, + -0.372183621f, -0.0812346712f, -0.00707253255f, -0.0810251758f, 0.267345309f, -0.0787685066f, + 0.258760840f, -0.0768160895f, -0.377273679f, -0.0763452053f, -0.0314898677f, -0.0743160769f, + 0.223423928f, -0.0724818707f, 0.00284322398f, -0.0720727518f, 0.232531011f, -0.0682833865f, + 0.282355100f, -0.0655683428f, -0.233353317f, -0.0613981225f, 0.290982842f, -0.0607336313f, + -0.0994169787f, -0.0376472026f, 0.257561266f, -0.0331368558f, 0.265076399f, -0.0320781991f, + 0.0454338901f, -0.0238198638f, 0.0409987904f, -0.0186991505f, -0.502306283f, -0.0172236171f, + -0.464807063f, -0.0149533665f, -0.185798749f, -0.00540314987f, 0.182073534f, -0.000651287497f, + -0.435764432f, 0.00162558386f, -0.181552932f, 0.00792864431f, -0.700565279f, 0.0110246018f, + -0.144087434f, 0.0120453080f, -0.524990261f, 0.0138590708f, -0.182723984f, 0.0165519360f, + -0.217308879f, 0.0208590515f, 0.462978750f, 0.0247372910f, 0.0956632495f, 0.0323494300f, + 0.0843820646f, 0.0424364135f, 0.122466311f, 0.0441578403f, -0.162433729f, 0.0528083183f, + 0.0964344442f, 0.0624147579f, -0.271349967f, 0.0727724135f, -0.266336441f, 0.0719895661f, + 0.0675768778f, 0.0848240927f, -0.689944625f, 0.0889045894f, -0.680990934f, 0.0903657600f, + -0.119472280f, 0.0930491239f, -0.124393739f, 0.0933082998f, -0.323403478f, 0.0937438533f, + -0.323273063f, 0.0969979763f, -0.352427900f, 0.101048596f, -0.327554941f, 0.104539163f, + -0.330044419f, 0.114519835f, 0.0235135648f, 0.118004657f, -0.671623945f, 0.130437061f, + -0.385111898f, 0.142786101f, -0.376281500f, 0.145800456f, -0.0169987213f, 0.148056105f, + -0.326495141f, 0.152596891f, -0.337120056f, 0.154522225f, -0.336885720f, 0.154304653f, + 0.322089493f, 0.155130088f, -0.0713477954f, 0.163638428f, -0.0208650175f, 0.171433330f, + -0.380652726f, 0.172022790f, -0.0599780641f, 0.182294667f, 0.244408697f, 0.194245726f, + -0.101454332f, 0.198159069f, 0.257901788f, 0.200226694f, -0.0775909275f, 0.205242962f, + 0.231870517f, 0.222396746f, -0.546760798f, 0.242291704f, -0.538914979f, 0.243761152f, + 0.206653103f, 0.244874880f, -0.595693469f, 0.264329463f, -0.581023335f, 0.265664101f, + 0.00444878871f, 0.267031074f, -0.573156178f, 0.271591753f, -0.543381274f, 0.271759123f, + 0.00450209389f, 0.271335930f, -0.223618075f, 0.278416723f, 0.161934286f, 0.289435983f, + -0.199636295f, 0.296817899f, -0.250217140f, 0.299677849f, -0.258231103f, 0.314012855f, + -0.628315628f, 0.316889286f, 0.320948511f, 0.316358119f, -0.246845752f, 0.320511192f, + 0.0687271580f, 0.321383297f, 0.0784438103f, 0.322898388f, 0.0946765989f, 0.325111747f, + -0.249674007f, 0.328731328f, -0.244633347f, 0.329467386f, -0.245841011f, 0.334985316f, + 0.118609101f, 0.343532443f, 0.0497615598f, 0.348162144f, -0.221477821f, 0.349263757f, + 0.0759577379f, 0.351840734f, 0.0504637137f, 0.373238713f, 0.0730970055f, 0.376537383f, + -0.204333842f, 0.381100655f, -0.557245076f, -0.339432925f, -0.402010202f, -0.288829565f, + -0.350465477f, -0.281259984f, -0.352995187f, -0.264569730f, -0.466762394f, -0.217114508f, + 0.152002022f, -0.217566550f, 0.146226048f, -0.183914393f, 0.0949312001f, -0.177005857f, + -0.211882949f, -0.175594494f, -0.531562269f, -0.173924312f, -0.0727246776f, -0.167270422f, + 0.0546481088f, -0.140193000f, -0.296819001f, -0.137850702f, -0.261863053f, -0.139540121f, + 0.187967837f, -0.131033540f, 0.322852045f, -0.112108752f, -0.0432251953f, -0.102951847f, + -0.0453428440f, -0.0914504975f, -0.0182842426f, -0.0918859020f, 0.0140433423f, -0.0904538929f, + -0.377287626f, -0.0817026496f, 0.266108125f, -0.0797783583f, 0.257961422f, -0.0767710134f, + -0.495943695f, -0.0683977529f, 0.231466040f, -0.0675206482f, -0.240675926f, -0.0551427566f, + -0.482824773f, -0.0510699376f, -0.491354793f, -0.0414650664f, -0.0960614979f, -0.0377000235f, + -0.102409534f, -0.0369749814f, -0.471273214f, -0.0325376652f, -0.483320534f, -0.0174943600f, + -0.457503378f, -0.0152483145f, -0.178161725f, -0.0153892851f, -0.483233035f, -0.0106405178f, + -0.472914547f, -0.0105228210f, -0.166542307f, -0.00667150877f, 0.181261331f, -0.00449455017f, + -0.474292487f, -0.00428914558f, -0.185297221f, -0.00575157674f, -0.494381040f, -0.00278507406f, + -0.141748473f, -0.00289725070f, -0.487515569f, 0.000758233888f, 0.322646528f, 0.0197495818f, + 0.142943904f, 0.0276249554f, -0.563232243f, 0.0306834858f, -0.555995941f, 0.0367121249f, + 0.114935011f, 0.0496927276f, -0.152954608f, 0.0538645200f, -0.594885707f, 0.0562511310f, + 0.0678326488f, 0.0756176412f, -0.667605639f, 0.0828208700f, -0.354470938f, 0.101424232f, + 0.0228204262f, 0.120382607f, -0.639557123f, 0.124422595f, -0.690505445f, 0.126883239f, + -0.395509213f, 0.130242139f, -0.00618012529f, 0.139929801f, 0.175945997f, 0.140235618f, + 0.198833048f, 0.167587668f, -0.334679037f, 0.177859858f, 0.236127406f, 0.192743436f, + 0.283146858f, 0.204260647f, -0.0354267135f, 0.206209183f, 0.247388184f, 0.207016930f, + -0.0422560424f, 0.212493256f, 0.261681855f, 0.215763748f, 0.207528576f, 0.219807997f, + -0.300219178f, 0.221922547f, 0.206393883f, 0.245171010f, 0.239619836f, 0.244768366f, + -0.523026288f, 0.250639766f, -0.591975033f, 0.254252791f, 0.246785000f, 0.252878994f, + 0.272995651f, 0.255815417f, 0.00825022161f, 0.265591830f, 0.192723796f, 0.266924977f, + -0.222951472f, 0.290150762f, -0.545146644f, 0.304910392f, 0.131736591f, 0.319247276f, + 0.319435924f, 0.317917794f, 0.0687546134f, 0.321296155f, -0.255853772f, 0.327258259f, + 0.0948092714f, 0.325284332f, 0.104488030f, 0.327628911f, -0.245483562f, 0.327617317f, + 0.0647632629f, 0.363111496f, -0.382861346f, -0.287226975f, -0.354297429f, -0.278708905f, + -0.356116027f, -0.262691110f, -0.369049937f, -0.237850189f, -0.146217853f, -0.233530551f, + 0.102752604f, -0.223108903f, 0.137545392f, -0.218163848f, 0.125815898f, -0.216970086f, + -0.557826996f, -0.194665924f, -0.533946335f, -0.184958249f, 0.0976954028f, -0.173691019f, + -0.240166873f, -0.160652772f, 0.166464865f, -0.154563308f, -0.0330923162f, -0.125799045f, + -0.290044904f, -0.118914597f, 0.00350888353f, -0.108661920f, -0.0109116854f, -0.106212743f, + -0.0298740193f, -0.102953635f, -0.287203342f, -0.0997403413f, -0.269498408f, -0.0981520712f, + -0.000815737061f, -0.0938294530f, 0.274663270f, -0.0844340026f, -0.371082008f, -0.0805466920f, + -0.368196100f, -0.0743779093f, 0.00675902702f, -0.0735078678f, 0.226267770f, -0.0744194537f, + -0.241736412f, -0.0630025938f, -0.408663541f, -0.0564615242f, 0.251640886f, -0.0519632548f, + 0.249993712f, -0.0519672707f, -0.426033378f, -0.0365641154f, -0.467352122f, -0.0305716563f, + 0.251341015f, -0.0268137120f, -0.443456501f, -0.0243669953f, -0.502199471f, -0.0151771074f, + -0.178487480f, -0.0155749097f, 0.178145915f, -0.00528379623f, -0.492981344f, -0.00174682145f, + -0.150337398f, 0.000692513015f, -0.457302928f, 0.00352234906f, 0.190587431f, 0.00151424226f, + -0.482671946f, 0.00682042213f, -0.158589542f, 0.0150188655f, -0.182223722f, 0.0145649035f, + 0.107089065f, 0.0223725326f, 0.135399371f, 0.0275243558f, -0.552838683f, 0.0275048595f, + -0.432176501f, 0.0248741303f, -0.192510992f, 0.0281074084f, -0.553043425f, 0.0298770685f, + -0.684887648f, 0.0436144769f, 0.0850105733f, 0.0448755622f, -0.165784389f, 0.0439001285f, + 0.102653719f, 0.0457992665f, 0.114853017f, 0.0504316092f, -0.647432685f, 0.0608204119f, + 0.0828530043f, 0.0608987175f, 0.0894377902f, 0.0742467493f, 0.0702404827f, 0.0767309442f, + -0.613642335f, 0.0779517740f, -0.670592189f, 0.0849624202f, -0.395209312f, 0.0854151621f, + 0.125186160f, 0.0919951499f, -0.359707922f, 0.102121405f, -0.354259193f, 0.101300709f, + 0.0304000825f, 0.110619470f, -0.677573025f, 0.114422500f, 0.0305799693f, 0.121603437f, + -0.358950615f, 0.121660560f, -0.718753040f, 0.134569481f, 0.256451160f, 0.141883001f, + -0.0904129520f, 0.146879435f, -0.0184279438f, 0.148968369f, -0.356992692f, 0.160104826f, + -0.337676436f, 0.161766291f, 0.201174691f, 0.169025913f, -0.378423393f, 0.170933828f, + -0.601599216f, 0.174998865f, -0.0902864039f, 0.184311926f, -0.0584819093f, 0.184186250f, + 0.294467270f, 0.182560727f, 0.250262231f, 0.186239958f, -0.326370239f, 0.191697389f, + -0.0980727375f, 0.196913749f, 0.253085673f, 0.201914877f, -0.0344332159f, 0.205900863f, + 0.255287141f, 0.203029931f, -0.452713937f, 0.205191836f, 0.264822274f, 0.217408702f, + -0.0290334225f, 0.221684650f, -0.583990574f, 0.237398431f, -0.145020664f, 0.240374506f, + 0.249667659f, 0.254706532f, 0.274279058f, 0.256447285f, -0.282936275f, 0.259140193f, + 0.241211995f, 0.260401577f, -0.590560019f, 0.272659779f, -0.574947417f, 0.272671998f, + -0.224780366f, 0.279990941f, -0.525540829f, 0.287235677f, -0.247069210f, 0.298608154f, + -0.201292604f, 0.298156679f, 0.319822490f, 0.317605704f, -0.248013541f, 0.320789784f, + 0.0957527757f, 0.326543272f, 0.105006196f, 0.328469753f, -0.264089525f, 0.332354158f, + -0.670460403f, 0.339870930f, 0.118318990f, 0.345167071f, 0.0737744719f, 0.353734553f, + 0.0655663237f, 0.361025929f, -0.306805104f, 0.363820761f, 0.0524423867f, 0.371921480f, + 0.0713953897f, 0.375074357f, -0.411387652f, -0.268335998f, -0.357590824f, -0.263346583f, + -0.407676578f, -0.253785878f, 0.0660323426f, -0.253718942f, -0.157670841f, -0.225629836f, + 0.170453921f, -0.220800355f, -0.475751191f, -0.209005311f, -0.331408232f, -0.203059763f, + -0.173841938f, -0.199112654f, -0.503261328f, -0.193795130f, -0.532277644f, -0.190292686f, + -0.0972326621f, -0.191563144f, -0.0692789108f, -0.172031537f, -0.318824291f, -0.169072524f, + -0.576232314f, -0.162124678f, -0.0839322209f, -0.156304389f, -0.583625376f, -0.142171323f, + -0.0546422042f, -0.135338858f, 0.0501612425f, -0.132490858f, -0.645011544f, -0.111341864f, + -0.0925374180f, -0.0483307689f, -0.444242209f, -0.0263337940f, 0.0335495919f, -0.0281750113f, + 0.274629444f, -0.0259516705f, 0.213774025f, -0.0240113474f, -0.194874078f, -0.0151330847f, + 0.175111562f, -0.00868577976f, -0.185011521f, -0.000680683181f, 0.152071685f, 0.0204544198f, + 0.321354061f, 0.0199794695f, -0.192160159f, 0.0275637116f, -0.189656645f, 0.0275667012f, + 0.137452200f, 0.0298070628f, -0.194602579f, 0.0449027494f, -0.647751570f, 0.0625102371f, + 0.124078721f, 0.0639316663f, 0.125849217f, 0.0762147456f, -0.614036798f, 0.0778791085f, + -0.684063017f, 0.0867959261f, -0.670344174f, 0.0846142769f, -0.127689242f, 0.0883567855f, + 0.123796627f, 0.0907361880f, -0.356352538f, 0.101948388f, -0.388843179f, 0.110183217f, + 0.0316384435f, 0.123791300f, -0.627986908f, 0.146491125f, -0.0747071728f, 0.158135459f, + -0.0235102437f, 0.168867558f, -0.0903210714f, 0.184088305f, 0.292073458f, 0.183571488f, + -0.0585953295f, 0.184784085f, -0.0317775607f, 0.218368888f, 0.209752038f, 0.223883361f, + -0.295424402f, 0.229150623f, -0.144439027f, 0.237902716f, -0.284140587f, 0.262761474f, + 0.289083928f, 0.276900887f, 0.159017235f, 0.300793648f, -0.204925507f, 0.298536539f, + -0.544958472f, 0.305164427f, -0.261615157f, 0.306550682f, 0.0977220088f, 0.327949613f, + 0.109876208f, 0.337665111f, -0.283918083f, 0.347385526f, 0.0436712503f, 0.350702018f, + 0.114512287f, 0.367949426f, 0.106543839f, 0.375095814f, 0.505324781f, -0.272183985f, + 0.0645913780f, -0.251512915f, -0.457196057f, -0.225893468f, -0.480293810f, -0.222602293f, + -0.138176888f, -0.209798917f, -0.110901751f, -0.198036820f, -0.196451947f, -0.191723794f, + -0.537742376f, -0.174413025f, -0.0650562346f, -0.174762890f, -0.567489207f, -0.165461496f, + 0.0879585966f, -0.163023785f, -0.303777844f, -0.142031133f, 0.199195996f, -0.141861767f, + 0.0491657220f, -0.132264882f, -0.497363061f, -0.107934952f, -0.000536393432f, -0.102828167f, + 0.0155952247f, -0.0998895392f, -0.363601953f, -0.0897399634f, -0.224325985f, -0.0719678402f, + -0.0638299435f, -0.0646244809f, -0.108656809f, -0.0468749776f, -0.0865045264f, -0.0512534790f, + -0.469339728f, -0.0279338267f, 0.0578282699f, -0.0133374622f, -0.195265710f, -0.0115369316f, + 0.296735317f, -0.0132813146f, 0.0664219409f, 0.0134935537f, 0.126060545f, 0.0333039127f, + 0.139887005f, 0.0334976614f, -0.547339618f, 0.0433730707f, 0.0866046399f, 0.0527233221f, + 0.131943896f, 0.0657638907f, -0.280056775f, 0.0685855150f, 0.0746403933f, 0.0795079395f, + 0.125382811f, 0.0822770745f, -0.648187757f, 0.103887804f, -0.107411072f, 0.107508548f, + 0.0155869983f, 0.108978622f, 0.0189307462f, 0.129617691f, 0.162685350f, 0.127225950f, + -0.0875291452f, 0.142281070f, 0.319728941f, 0.148827255f, -0.0259547811f, 0.169724479f, + 0.259297132f, 0.190075457f, -0.467013776f, 0.212794706f, -0.315732479f, 0.219243437f, + -0.111042649f, 0.217940107f, 0.239550352f, 0.222786069f, 0.263966352f, 0.260309041f, + 0.320023954f, -0.222228840f, -0.322707742f, -0.213004455f, -0.224977970f, -0.169595599f, + -0.605799317f, -0.142425537f, 0.0454332717f, -0.129945949f, 0.205748767f, -0.113405459f, + 0.317985803f, -0.118630089f, 0.497755647f, -0.0962266177f, -0.393495560f, -0.0904672816f, + 0.240035087f, -0.0737613589f, -0.212947786f, -0.0280145984f, 0.0674179196f, 0.0124880793f, + -0.545862198f, 0.0207057912f, -0.284409463f, 0.0626631007f, -0.107082598f, 0.0854173824f, + 0.0578137375f, 0.0917839557f, 0.145844117f, 0.102937251f, 0.183878779f, 0.119614877f, + -0.626380265f, 0.140862882f, -0.0325521491f, 0.161834121f, -0.590211987f, 0.167720392f, + 0.289599866f, 0.186565816f, -0.328821093f, 0.187714070f, -0.289086968f, 0.205165654f, + -0.445392698f, 0.215343162f, 0.173715711f, 0.273563296f, 0.284015119f, 0.270610362f, + 0.0174398609f, 0.283809274f, -0.496335506f, -0.202981815f, 0.0389454551f, -0.166210428f, + -0.317301393f, -0.156280205f, -0.396320462f, -0.0949599668f, -0.213638976f, -0.0776446015f, + 0.497601509f, -0.0928353444f, -0.260220319f, -0.0718628615f, -0.116495222f, -0.0543703064f, + -0.118132629f, -0.0156126227f, 0.0242815297f, 0.00629332382f, -0.537928998f, 0.00815516617f, + 0.317720622f, 0.0271231923f, -0.582170665f, 0.0478387438f, -0.536856830f, 0.0466793887f, + -0.220819592f, 0.0433096550f, -0.246473342f, 0.0572598167f, 0.481240988f, 0.0503845438f, + -0.102453016f, 0.0649363101f, -0.149955124f, 0.0744054317f, -0.248215869f, 0.0916868672f, + -0.101221249f, 0.110788561f, -0.437672526f, 0.179065496f, -0.0383506976f, 0.183546484f, + -0.279600590f, 0.208760634f, 0.182261929f, 0.275244594f, 0.0253023170f, -0.170456246f, + -0.476852804f, -0.123630777f, -0.0803126246f, -0.0782076195f, -0.133338496f, -0.0659459904f, + -0.0822777376f, -0.00390591589f, 0.149250969f, 0.104314201f, 0.0418044887f, 0.149009049f, + -0.438308835f, 0.164682120f + }; + + const Point2f* currRectifiedPointArr_2f = (const Point2f*)currRectifiedPointArr; + vector currRectifiedPoints(currRectifiedPointArr_2f, + currRectifiedPointArr_2f + sizeof(currRectifiedPointArr) / sizeof(currRectifiedPointArr[0]) / 2); + + _currRectifiedPoints.swap(currRectifiedPoints); + + static const float prevRectifiedPointArr[] = { + -0.599324584f, -0.381164283f, -0.387985110f, -0.385367423f, -0.371437579f, -0.371891201f, + -0.340867460f, -0.370632380f, -0.289822906f, -0.364118159f, -0.372411519f, -0.335272551f, + -0.289586753f, -0.335766882f, -0.372335523f, -0.316857219f, -0.321099430f, -0.323233813f, + 0.208661616f, -0.153931335f, -0.559897065f, 0.193362445f, 0.0181128159f, -0.325224668f, + -0.427504510f, 0.105302416f, 0.487470537f, -0.187071189f, 0.343267351f, -0.339755565f, + -0.477639943f, -0.204375938f, -0.466626763f, -0.204072326f, 0.340813518f, -0.347292691f, + 0.342682719f, -0.320172101f, 0.383663863f, -0.327343374f, -0.467062414f, -0.193995550f, + -0.475603998f, -0.189820126f, 0.552475691f, 0.198386014f, -0.508027375f, -0.174297482f, + -0.211989403f, -0.217261642f, 0.180832058f, -0.127527758f, -0.112721168f, -0.125876635f, + -0.112387165f, -0.167135969f, -0.562491000f, -0.140186235f, 0.395156831f, -0.298828602f, + -0.485202312f, -0.135626689f, 0.148358017f, -0.195937276f, -0.248159677f, -0.254669130f, + -0.568366945f, -0.105187029f, -0.0714842379f, -0.0832463056f, -0.497599572f, -0.205334768f, + -0.0948727652f, 0.245045587f, 0.160857186f, 0.138075173f, 0.164952606f, -0.195109487f, + 0.165254518f, -0.186554477f, -0.183777973f, -0.124357253f, 0.166813776f, -0.153241888f, + -0.241765827f, -0.0820638761f, 0.208661616f, -0.153931335f, 0.540147483f, -0.203156039f, + 0.529201686f, -0.199348077f, -0.248159677f, -0.254669130f, 0.180369601f, -0.139303327f, + 0.570952237f, -0.185722873f, 0.221771300f, -0.143187970f, 0.498627752f, -0.183768719f, + 0.561214447f, -0.188666284f, -0.241409421f, -0.253560483f, 0.569648385f, -0.184499770f, + 0.276665628f, -0.0881819800f, 0.533934176f, -0.142226711f, -0.299728751f, -0.330407321f, + 0.270322412f, -0.256552309f, -0.255016476f, -0.0823200271f, -0.378096581f, 0.0264666155f, + -0.331565350f, 0.0210608803f, 0.0100810500f, -0.0213523544f, -0.248159677f, -0.254669130f, + 0.249623299f, 0.164078355f, 0.0190342199f, -0.00415771967f, 0.604407132f, -0.259350061f, + 0.0660026148f, -0.00787150953f, 0.605921566f, 0.114344336f, 0.0208173525f, 0.00527517078f, + -0.0200567022f, 0.0183092188f, -0.184784368f, -0.193566754f, -0.0125719802f, -0.344967902f, + 0.343063682f, -0.0121044181f, 0.389022052f, -0.0171062462f, 0.163190305f, 0.200014487f, + 0.362440646f, 0.0120019922f, -0.427743971f, 0.100272447f, -0.0714842379f, -0.0832463056f, + 0.0664352402f, 0.0467514023f, -0.559897065f, 0.193362445f, -0.549086213f, 0.193808615f, + -0.241472989f, -0.253163874f, -0.241765827f, -0.0820638761f, -0.122216024f, 0.132651567f, + -0.122216024f, 0.132651567f, 0.515065968f, 0.205271944f, 0.180832058f, -0.127527758f, + -0.123633556f, 0.154476687f, -0.248159677f, -0.254669130f, 0.0208173525f, 0.00527517078f, + -0.483276874f, 0.191274792f, -0.167928949f, 0.200682297f, 0.232745290f, -0.211950779f, + -0.288701504f, -0.334238827f, -0.119621970f, 0.204155236f, -0.119621970f, 0.204155236f, + 0.632996142f, 0.0804972649f, 0.189231426f, 0.164325386f, 0.249623299f, 0.164078355f, + 0.0676716864f, 0.0479496233f, 0.207636267f, 0.184271768f, -0.300510556f, 0.358790994f, + -0.107678331f, 0.188473806f, 0.565983415f, 0.144723341f, 0.191329703f, 0.213909492f, + -0.0283227600f, -0.373237878f, -0.184958130f, 0.200373843f, 0.0346363746f, -0.0259889495f, + -0.112387165f, -0.167135969f, 0.251426309f, 0.210430339f, -0.477397382f, -0.131372169f, + -0.0667442903f, 0.0997460634f, 0.251426309f, 0.210430339f, -0.317926824f, 0.375238001f, + -0.0621999837f, 0.280056626f, 0.0443522707f, 0.321513236f, 0.471269101f, 0.260774940f, + -0.107678331f, 0.188473806f, 0.0208210852f, 0.350526422f, 0.0157474391f, 0.367335707f, + 0.632996142f, 0.0804972649f, 0.646697879f, 0.265504390f, 0.0295150280f, 0.371205181f, + 0.376071006f, 0.313471258f, -0.379525930f, 0.364357829f, -0.00628023129f, -0.0373278372f, + 0.0291138459f, 0.381194293f, 0.0358079821f, 0.381886899f, 0.0344478637f, 0.386993408f, + 0.433862329f, 0.328515977f, 0.359724253f, 0.345606029f, 0.0651357397f, 0.397334814f, + 0.388413996f, 0.344747871f, -0.140228778f, 0.216103494f, 0.389989913f, 0.372472703f, + 0.444995403f, 0.300240308f, -0.606455386f, 0.100793049f, -0.362332910f, -0.371920794f, + -0.478956074f, 0.234040022f, -0.289441198f, -0.344822973f, -0.0714842379f, -0.0832463056f, + 0.375879139f, -0.374975592f, 0.376526117f, -0.326493502f, 0.313251913f, -0.306372881f, + -0.0577337518f, 0.0893306211f, -0.483683407f, -0.179540694f, -0.0763650239f, -0.258294433f, + 0.276665628f, -0.0881819800f, -0.167122558f, -0.175508693f, -0.164081737f, 0.176902041f, + 0.276665628f, -0.0881819800f, 0.602967978f, -0.260941893f, 0.158573851f, -0.178748295f, + 0.159815103f, -0.160761341f, 0.194283918f, -0.165657878f, 0.231515527f, -0.172808051f, + -0.247000366f, 0.277822912f, 0.538969517f, -0.204621449f, 0.531404376f, -0.198565826f, + -0.388338953f, -0.0433262810f, 0.499413073f, -0.181929186f, -0.237337112f, 0.0934364349f, + -0.368045300f, -0.0204487685f, -0.374767631f, -0.00678646797f, -0.0667242110f, -0.248651102f, + -0.248159677f, -0.254669130f, -0.345217139f, -0.00101677026f, -0.353382975f, 0.0210586078f, + -0.322639942f, 0.0211628731f, 0.0184581745f, -0.0366852731f, 0.0259528626f, -0.0136881955f, + -0.339446336f, 0.0286702402f, 0.0335014127f, -0.0271516014f, 0.465966076f, 0.0830826238f, + -0.337860256f, 0.0362124667f, 0.188271523f, -0.146541893f, -0.298272073f, -0.323130161f, + 0.0643569306f, -0.0264105909f, -0.353804410f, 0.0433940105f, 0.618646920f, -0.0855877250f, + 0.411329508f, -0.0414552018f, -0.427743971f, 0.100272447f, -0.247000366f, 0.277822912f, + 0.381912649f, -0.00914942939f, 0.0664352402f, 0.0467514023f, 0.138687640f, -0.114854909f, + -0.0170480162f, -0.372787565f, -0.535477102f, 0.183755845f, -0.155668780f, 0.144164801f, + -0.427504510f, 0.105302416f, -0.484430760f, 0.227277100f, -0.361284673f, -0.373513311f, + -0.316764563f, 0.331503242f, -0.0230990555f, 0.314180285f, 0.101539977f, -0.256640851f, + -0.210743994f, -0.111771651f, -0.560086846f, 0.151153624f, 0.542884171f, 0.141691014f, + 0.596041858f, 0.144990161f, 0.239398748f, 0.207432285f, 0.557545543f, 0.155783832f, + 0.233033463f, 0.214694947f, 0.572789013f, 0.162068501f, 0.512761712f, 0.176260322f, + 0.287076950f, 0.0868823677f, 0.515065968f, 0.205271944f, 0.552475691f, 0.198386014f, + -0.301232725f, 0.347804308f, -0.379525930f, 0.364357829f, 0.561403453f, 0.206571117f, + 0.590792358f, 0.206283644f, -0.428855836f, 0.100270294f, 0.300039053f, -0.283949375f, + 0.0481642894f, 0.334260821f, -0.173260480f, -0.167126089f, 0.444995403f, 0.300240308f, + 0.646697879f, 0.265504390f, 0.375487208f, 0.314186513f, 0.0217850581f, 0.381838262f, + 0.404422343f, 0.313856274f, 0.417644382f, 0.314869910f, 0.0358079821f, 0.381886899f, + 0.378262609f, 0.358303785f, -0.336999178f, -0.367679387f, -0.295442462f, -0.365161836f, + -0.293496192f, -0.342732310f, -0.298767596f, -0.303165644f, -0.0111337993f, -0.342149645f, + 0.310648471f, -0.374146342f, 0.359467417f, -0.373746723f, 0.340779394f, -0.369219989f, + -0.527450860f, -0.203896046f, -0.490746915f, -0.194764644f, 0.314866364f, -0.300261766f, + -0.0298556220f, 0.0591949411f, 0.319549739f, 0.0552458987f, 0.163977623f, -0.209844783f, + -0.149107113f, -0.149005055f, 0.212483421f, -0.191198543f, 0.197611198f, -0.187811792f, + 0.174361721f, -0.179897651f, 0.0387913659f, -0.0366905928f, -0.122265801f, -0.126270071f, + 0.211038783f, -0.172842503f, 0.246728286f, 0.134398326f, -0.0577337518f, 0.0893306211f, + -0.415295422f, 0.105914228f, -0.292730510f, 0.0379575789f, 0.489636958f, -0.194117576f, + -0.254337519f, 0.0937413648f, 0.336177140f, 0.305443168f, 0.526942134f, -0.164069965f, + 0.524966419f, -0.165161178f, -0.379173398f, 0.332068861f, -0.340792000f, 0.00105464540f, + 0.525632977f, -0.134992197f, -0.308774501f, 0.00290521770f, -0.375407755f, 0.0294080544f, + 0.0178439785f, -0.0365749858f, -0.255016476f, -0.0823200271f, -0.359951973f, 0.0446678996f, + 0.0564084686f, -0.0197724514f, -0.315141559f, 0.0424463004f, 0.292196661f, 0.279810339f, + -0.345294952f, 0.0533128195f, 0.0458479226f, -0.00109126628f, 0.0179449394f, 0.00371767790f, + 0.365872562f, -0.0412087664f, 0.403013051f, -0.0416624695f, -0.0714842379f, -0.0832463056f, + -0.209011748f, 0.133690849f, 0.0122421598f, 0.0230175443f, -0.0577337518f, 0.0893306211f, + -0.572846889f, 0.141102776f, 0.345340014f, -0.0111671211f, 0.0479373708f, 0.0379454680f, + 0.363291621f, -0.00829032529f, 0.381912649f, -0.00914942939f, -0.521542430f, 0.151489466f, + 0.345966965f, 0.0110620018f, 0.354562849f, 0.0254590791f, 0.334322065f, 0.0310698878f, + -0.00463629747f, -0.0357710384f, -0.538667142f, 0.185365483f, -0.209011748f, 0.133690849f, + 0.398122877f, 0.0403857268f, -0.160881191f, 0.145009249f, -0.155668780f, 0.144164801f, + -0.0714842379f, -0.0832463056f, -0.536377013f, 0.221241340f, -0.0632879063f, -0.247039422f, + -0.155869946f, 0.169341147f, 0.578685045f, -0.223878756f, 0.557447612f, 0.0768704116f, + -0.188812047f, 0.228197843f, 0.246747240f, 0.136472240f, -0.142677084f, 0.213736445f, + -0.118143238f, 0.208306640f, -0.388338953f, -0.0433262810f, -0.163515776f, 0.231573820f, + -0.0738375857f, -0.256104171f, 0.173092276f, 0.191535592f, 0.208548918f, 0.185476139f, + -0.392410189f, 0.0686017647f, 0.555366814f, 0.130478472f, -0.101943128f, -0.113997340f, + 0.0716935173f, 0.340265751f, 0.561738014f, 0.148283109f, 0.242452115f, 0.205116034f, + 0.561738014f, 0.148283109f, -0.427743971f, 0.100272447f, 0.578137994f, 0.163653031f, + 0.251277626f, 0.223055005f, -0.376505047f, 0.343530416f, -0.0714842379f, -0.0832463056f, + 0.567448437f, 0.207419440f, 0.590792358f, 0.206283644f, 0.578685045f, -0.223878756f, + 0.0635343120f, -0.00499309227f, -0.370767444f, 0.384881169f, -0.485191971f, -0.120962359f, + 0.512761712f, 0.176260322f, -0.375972956f, 0.0288736783f, -0.147176415f, -0.185790271f, + 0.0752977654f, 0.339190871f, 0.646697879f, 0.265504390f, 0.0282997675f, 0.373214334f, + 0.410353780f, 0.316089481f, 0.417644382f, 0.314869910f, 0.0147482762f, 0.389459789f, + -0.182916895f, -0.140514761f, 0.433515042f, 0.330774426f, 0.388069838f, 0.347381502f, + 0.378925055f, 0.357438952f, 0.247128293f, -0.116897359f, -0.0230906308f, 0.314556211f, + 0.388534039f, 0.370789021f, -0.368050814f, -0.339653373f, -0.292694926f, -0.341653705f, + -0.353774697f, -0.320387989f, 0.599263310f, -0.264537901f, -0.0213720929f, -0.326088905f, + -0.571947694f, 0.141147330f, -0.0577337518f, 0.0893306211f, 0.108424753f, -0.267108470f, + -0.0317604132f, -0.0458168685f, -0.0967136100f, 0.242639020f, -0.486509413f, -0.204596937f, + 0.239178345f, -0.219647482f, 0.108424753f, -0.267108470f, -0.280393064f, -0.283867925f, + -0.533659995f, -0.151733354f, 0.0880429000f, -0.240412414f, -0.534965396f, -0.124174178f, + 0.142445788f, -0.118948005f, 0.0947291106f, 0.0767719224f, -0.597055852f, -0.0692315027f, + -0.254337519f, 0.0937413648f, -0.308869720f, 0.00354974205f, -0.409894019f, -0.0694356859f, + 0.556049764f, -0.137727231f, -0.0317604132f, -0.0458168685f, -0.524152219f, 0.239541322f, + 0.108424753f, -0.267108470f, 0.0143662402f, -0.0164190196f, 0.150936082f, 0.128616557f, + 0.618646920f, -0.0855877250f, 0.0122421598f, 0.0230175443f, 0.0122421598f, 0.0230175443f, + -0.188812047f, 0.228197843f, 0.00441747159f, -0.297387213f, -0.520719767f, 0.152393058f, + 0.392849416f, 0.00738697406f, 0.400074363f, 0.0185570847f, -0.161484867f, -0.192373112f, + -0.554901838f, 0.190730989f, -0.538667142f, 0.185365483f, -0.0667442903f, 0.0997460634f, + 0.399885803f, 0.0410231315f, -0.159816831f, 0.145826310f, -0.193316415f, 0.161277503f, + -0.0678345188f, 0.287081748f, -0.383089483f, -0.283330113f, -0.538667142f, 0.185365483f, + 0.245664895f, 0.162005231f, 0.173092276f, 0.191535592f, 0.601281762f, 0.120500855f, + 0.208548918f, 0.185476139f, 0.246893004f, 0.220670119f, 0.516039073f, 0.178782418f, + -0.254337519f, 0.0937413648f, -0.254337519f, 0.0937413648f, -0.0230990555f, 0.314180285f, + 0.610029638f, 0.227215171f, -0.254337519f, 0.0937413648f, 0.0697976872f, 0.343245506f, + 0.538969517f, -0.204621449f, 0.00916308723f, 0.359826297f, 0.410353780f, 0.316089481f, + 0.423950195f, 0.324112266f, 0.166566655f, 0.145402640f, 0.354594171f, 0.350193948f, + 0.433712035f, 0.356235564f, 0.425307065f, 0.364637494f, 0.166924104f, -0.152513608f, + 0.594130874f, -0.268246830f, -0.0843627378f, -0.0962528363f, 0.108424753f, -0.267108470f, + 0.00760878995f, -0.304247797f, -0.471018314f, -0.178305879f, -0.0817007348f, -0.0933016762f, + 0.232274890f, 0.154553935f, 0.108424753f, -0.267108470f, -0.525787771f, -0.161353886f, + -0.206048280f, 0.241006181f, -0.178062543f, -0.184703678f, 0.105906568f, 0.268231422f, + -0.0817007348f, -0.0933016762f, 0.490914792f, 0.276718110f, -0.176861435f, -0.153617889f, + 0.0387795344f, 0.0457828715f, 0.456206828f, -0.250739783f, 0.0982551053f, 0.104225174f, + 0.142445788f, -0.118948005f, 0.108424753f, -0.267108470f, -0.0817007348f, -0.0933016762f, + -0.340707630f, 0.00498990202f, 0.0947291106f, 0.0767719224f, 0.169802040f, 0.203134149f, + 0.577375948f, -0.125099033f, 0.318376005f, -0.0486588739f, 0.388697982f, -0.0351444185f, + 0.406605273f, -0.0364143848f, 0.274859309f, 0.0776181892f, 0.349759877f, -7.70174083e-05f, + 0.402967423f, 0.00697830878f, 0.105906568f, 0.268231422f, 0.338973522f, 0.0359939188f, + 0.394951165f, 0.0322254188f, -0.503028810f, 0.203627899f, -0.0840740278f, -0.234684706f, + 0.108424753f, -0.267108470f, 0.286642373f, 0.103878126f, -0.0817007348f, -0.0933016762f, + 0.332983583f, -0.0356097035f, 0.628004134f, 0.0766527727f, -0.112659439f, -0.196833044f, + 0.568797410f, 0.136423931f, 0.456206828f, -0.250739783f, -0.254337519f, 0.0937413648f, + -0.206692874f, -0.210832119f, 0.550912619f, 0.171586066f, 0.581267595f, 0.213235661f, + 0.334484309f, 0.303876013f, -0.469516128f, 0.0883551016f, 0.133899942f, 0.106862970f, + -0.560961962f, -0.114681393f, -0.0840740278f, -0.234684706f, 0.459386230f, -0.236088052f, + 0.594130874f, -0.268246830f, -0.124856450f, 0.193096936f, -0.469516128f, 0.0883551016f, + 0.514290810f, -0.193822652f, 0.158255994f, 0.233290926f, 0.317973822f, -0.0477817170f, + -0.0817007348f, -0.0933016762f, -0.0702776462f, -0.0671426803f, 0.440836668f, -0.100193374f, + 0.326240778f, 0.0523138903f, -0.279556662f, -0.283929169f, -0.485202312f, -0.135626689f, + -0.467358112f, 0.246376559f, 0.232274890f, 0.154553935f, 0.258349210f, -0.269529581f, + 0.600620329f, 0.126268178f, -0.0985416993f, 0.245674044f, -0.279264033f, -0.0990248993f, + 0.108424753f, -0.267108470f, 0.259638488f, -0.100053802f, 0.605106652f, 0.223564968f, + 0.129683495f, -0.100376993f, -0.0953388065f, 0.112722203f, -0.440420747f, -0.0396305211f, + -0.0181254297f, 0.0439292751f, -0.0878356919f, 0.0847257674f, -0.271582603f, 0.126064256f, + -0.183777973f, -0.124357253f, 0.431088895f, 0.0680654719f, -0.469516128f, 0.0883551016f, + -0.445174575f, 0.133306518f, -0.0878356919f, 0.0847257674f, -0.279039949f, 0.0810008645f, + 0.612402737f, -0.0826834291f, -0.454494953f, 0.122878648f, 0.244000912f, -0.264438629f, + 0.142445788f, -0.118948005f, 0.129683495f, -0.100376993f, -0.210078895f, 0.131698489f, + -0.277847171f, 0.0665081516f, 0.431088895f, 0.0680654719f, 0.252345473f, 0.0688349009f, + 0.133899942f, 0.106862970f, 0.133899942f, 0.106862970f, -0.486509413f, -0.204596937f, + -0.0940247625f, 0.0698821172f, 0.133899942f, 0.106862970f, -0.440420747f, -0.0396305211f, + -0.0878356919f, 0.0847257674f, -0.0954068601f, -0.0968973264f, -0.277847171f, 0.0665081516f, + -0.277847171f, 0.0665081516f, 0.266677618f, 0.111257851f, 0.292424291f, -0.230888903f, + -0.0954068601f, -0.0968973264f + }; + + const Point2f* prevRectifiedPointArr_2f = (const Point2f*)prevRectifiedPointArr; + vector prevRectifiedPoints(prevRectifiedPointArr_2f, prevRectifiedPointArr_2f + + sizeof(prevRectifiedPointArr) / sizeof(prevRectifiedPointArr[0]) / 2); + + _prevRectifiedPoints.swap(prevRectifiedPoints); + + int validSolutionArr[2] = { 0, 2 }; + + vector validSolutions(validSolutionArr, validSolutionArr + + sizeof(validSolutionArr) / sizeof(validSolutionArr[0])); + + _validSolutions.swap(validSolutions); + + vector rotations; + vector normals; + + for (size_t i = 0; i < (sizeof(rotationsArray) / sizeof(*rotationsArray)); i++) { + Mat tempRotMat = Mat(Matx33d( + rotationsArray[i][0], + rotationsArray[i][1], + rotationsArray[i][2], + rotationsArray[i][3], + rotationsArray[i][4], + rotationsArray[i][5], + rotationsArray[i][6], + rotationsArray[i][7], + rotationsArray[i][8] + )); + + Mat tempNormMat = Mat(Matx31d( + normalsArray[i][0], + normalsArray[i][1], + normalsArray[i][2] + )); + + rotations.push_back(tempRotMat); + normals.push_back(tempNormMat); + } + + _rotations.swap(rotations); + _normals.swap(normals); + + _mask = Mat(514, 1, CV_8U, maskArray).clone(); + } + + bool isValidResult(const vector& solutions) + { + return (solutions == _validSolutions); + } + + vector _validSolutions; + vector _prevRectifiedPoints, _currRectifiedPoints; + Mat _mask; + vector _rotations, _normals; +}; + +TEST(Calib3d_FilterDecomposeHomography, regression) { CV_FilterHomographyDecompTest test; test.safe_run(); } + +}} diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 9549439fd4..e306dde188 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -12,7 +12,8 @@ ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX) ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js) -ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL) +ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE) + if(OPENCV_DNN_OPENCL AND HAVE_OPENCL) add_definitions(-DCV_OCL4DNN=1) else() diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 6a7c9d5a6a..0177b31a47 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1446,7 +1446,7 @@ struct Net::Impl // TODO: OpenCL target support more fusion styles. if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) && (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && - ld.layerInstance->type != "MVN")) ) + ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling")) ) continue; Ptr& currLayer = ld.layerInstance; @@ -1993,11 +1993,17 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin) backendNode->net = Ptr(new InfEngineBackendNet(ieNet)); for (auto& it : ieNet.getOutputsInfo()) { + Ptr cvLayer(new InfEngineBackendLayer(it.second)); + InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str()); + CV_Assert(ieLayer); + LayerParams lp; int lid = cvNet.addLayer(it.first, "", lp); LayerData& ld = cvNet.impl->layers[lid]; - ld.layerInstance = Ptr(new InfEngineBackendLayer(it.second)); + cvLayer->name = it.first; + cvLayer->type = ieLayer->type; + ld.layerInstance = cvLayer; ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode; for (int i = 0; i < inputsNames.size(); ++i) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index eab1dcaa8a..775a044b44 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -165,6 +165,7 @@ public: (type == AVE ? LIBDNN_POOLING_METHOD_AVE : LIBDNN_POOLING_METHOD_STO); config.avePoolPaddedArea = avePoolPaddedArea; + config.computeMaxIdx = computeMaxIdx; config.use_half = use_half; poolOp = Ptr >(new OCL4DNNPool(config)); } diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index f3a26a3e6d..e0ca5ca98c 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -352,6 +352,7 @@ struct OCL4DNNPoolConfig pool_method(LIBDNN_POOLING_METHOD_MAX), global_pooling(false), avePoolPaddedArea(true), + computeMaxIdx(true), use_half(false) {} MatShape in_shape; @@ -365,6 +366,7 @@ struct OCL4DNNPoolConfig ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX; bool global_pooling; // = false; bool avePoolPaddedArea; + bool computeMaxIdx; bool use_half; }; @@ -399,6 +401,7 @@ class OCL4DNNPool int32_t pooled_height_; int32_t pooled_width_; bool avePoolPaddedArea; + bool computeMaxIdx; bool use_half; }; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp index 81238e9f3e..b74bf4d8e8 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp @@ -56,6 +56,7 @@ OCL4DNNPool::OCL4DNNPool(OCL4DNNPoolConfig config) channels_ = config.channels; pool_method_ = config.pool_method; avePoolPaddedArea = config.avePoolPaddedArea; + computeMaxIdx = config.computeMaxIdx; use_half = config.use_half; for (int i = 0; i < spatial_dims; ++i) @@ -97,7 +98,7 @@ bool OCL4DNNPool::Forward(const UMat& bottom, UMat& top_mask) { bool ret = true; - size_t global[] = { 128 * 128 }; + size_t global[] = { (size_t)count_ }; size_t local[] = { 128 }; // support 2D case @@ -105,8 +106,7 @@ bool OCL4DNNPool::Forward(const UMat& bottom, { case LIBDNN_POOLING_METHOD_MAX: { - bool haveMask = !top_mask.empty(); - String kname = haveMask ? "max_pool_forward_mask" : "max_pool_forward"; + String kname = computeMaxIdx ? "max_pool_forward_mask" : "max_pool_forward"; kname += (use_half) ? "_half" : "_float"; ocl::Kernel oclk_max_pool_forward( kname.c_str(), @@ -118,7 +118,7 @@ bool OCL4DNNPool::Forward(const UMat& bottom, kernel_w_, kernel_h_, stride_w_, stride_h_, pad_w_, pad_h_, - haveMask ? " -D HAVE_MASK=1" : "" + computeMaxIdx ? " -D HAVE_MASK=1" : "" )); if (oclk_max_pool_forward.empty()) diff --git a/modules/dnn/src/opencl/ocl4dnn_pooling.cl b/modules/dnn/src/opencl/ocl4dnn_pooling.cl index e9d1d26f0f..501f5a5e87 100644 --- a/modules/dnn/src/opencl/ocl4dnn_pooling.cl +++ b/modules/dnn/src/opencl/ocl4dnn_pooling.cl @@ -65,36 +65,40 @@ __kernel void #endif ) { - for (int index = get_global_id(0); index < nthreads; - index += get_global_size(0)) + int index = get_global_id(0); + if (index >= nthreads) + return; + + const int pw = index % pooled_width; + const int xx = index / pooled_width; + const int ph = xx % pooled_height; + const int ch = xx / pooled_height; + int hstart = ph * STRIDE_H - PAD_H; + int wstart = pw * STRIDE_W - PAD_W; + Dtype maxval = -FLT_MAX; + int maxidx = -1; + int in_offset = ch * height * width; + for (int h = 0; h < KERNEL_H; ++h) { - const int pw = index % pooled_width; - const int ph = (index / pooled_width) % pooled_height; - const int c = (index / pooled_width / pooled_height) % channels; - const int n = index / pooled_width / pooled_height / channels; - int hstart = ph * STRIDE_H - PAD_H; - int wstart = pw * STRIDE_W - PAD_W; - const int hend = min(hstart + KERNEL_H, height); - const int wend = min(wstart + KERNEL_W, width); - hstart = max(hstart, (int)0); - wstart = max(wstart, (int)0); - Dtype maxval = -FLT_MAX; - int maxidx = -1; - __global const Dtype* bottom_slice = bottom_data - + (n * channels + c) * height * width; - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - if (bottom_slice[h * width + w] > maxval) { - maxidx = h * width + w; - maxval = bottom_slice[maxidx]; + int off_y = hstart + h; + if (off_y >= 0 && off_y < height) + { + for (int w = 0; w < KERNEL_W; ++w) + { + int off_x = wstart + w; + if (off_x >= 0 && off_x < width) + { + Dtype val = bottom_data[in_offset + off_y * width + off_x]; + maxidx = (val > maxval) ? (off_y * width + off_x) : maxidx; + maxval = fmax(val, maxval); } } } - top_data[index] = maxval; + } + top_data[index] = maxval; #ifdef HAVE_MASK - mask[index] = maxidx; + mask[index] = maxidx; #endif - } } #elif defined KERNEL_AVE_POOL @@ -105,43 +109,42 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)( const int pooled_height, const int pooled_width, __global Dtype* top_data) { - for (int index = get_global_id(0); index < nthreads; - index += get_global_size(0)) - { - { - const int pw = index % pooled_width; - const int ph = (index / pooled_width) % pooled_height; - const int c = (index / pooled_width / pooled_height) % channels; - const int n = index / pooled_width / pooled_height / channels; - int hstart = ph * STRIDE_H - PAD_H; - int wstart = pw * STRIDE_W - PAD_W; - int hend = min(hstart + KERNEL_H, height + PAD_H); - int wend = min(wstart + KERNEL_W, width + PAD_W); - int pool_size; + int index = get_global_id(0); + if (index >= nthreads) + return; + + const int pw = index % pooled_width; + const int xx = index / pooled_width; + const int ph = xx % pooled_height; + const int ch = xx / pooled_height; + int hstart = ph * STRIDE_H - PAD_H; + int wstart = pw * STRIDE_W - PAD_W; + int hend = min(hstart + KERNEL_H, height + PAD_H); + int wend = min(wstart + KERNEL_W, width + PAD_W); + int pool_size; #ifdef AVE_POOL_PADDING_AREA - pool_size = (hend - hstart) * (wend - wstart); - hstart = max(hstart, (int)0); - wstart = max(wstart, (int)0); - hend = min(hend, height); - wend = min(wend, width); + pool_size = (hend - hstart) * (wend - wstart); + hstart = max(hstart, (int)0); + wstart = max(wstart, (int)0); + hend = min(hend, height); + wend = min(wend, width); #else - hstart = max(hstart, (int)0); - wstart = max(wstart, (int)0); - hend = min(hend, height); - wend = min(wend, width); - pool_size = (hend - hstart) * (wend - wstart); + hstart = max(hstart, (int)0); + wstart = max(wstart, (int)0); + hend = min(hend, height); + wend = min(wend, width); + pool_size = (hend - hstart) * (wend - wstart); #endif - Dtype aveval = 0; - __global const Dtype* bottom_slice = bottom_data - + (n * channels + c) * height * width; - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - aveval += bottom_slice[h * width + w]; - } - } - top_data[index] = aveval / pool_size; + Dtype aveval = 0; + int in_offset = ch * height * width; + for (int h = hstart; h < hend; ++h) + { + for (int w = wstart; w < wend; ++w) + { + aveval += bottom_data[in_offset + h * width + w]; } } + top_data[index] = aveval / pool_size; } #elif defined KERNEL_STO_POOL diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 9140368522..1faa7fba4d 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -18,6 +18,7 @@ Implementation of Tensorflow models parser #include #include #include +#include #include "tf_graph_simplifier.hpp" #endif @@ -50,7 +51,8 @@ enum DataLayout { DATA_LAYOUT_NHWC, DATA_LAYOUT_NCHW, - DATA_LAYOUT_UNKNOWN + DATA_LAYOUT_UNKNOWN, + DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) }; typedef std::vector > StrIntVector; @@ -245,16 +247,41 @@ const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, cons return layer.attr().at(name); } +static int getDataLayout(const tensorflow::NodeDef& layer) +{ + if (hasLayerAttr(layer, "data_format")) + { + std::string format = getLayerAttr(layer, "data_format").s(); + if (format == "NHWC" || format == "channels_last") + return DATA_LAYOUT_NHWC; + else if (format == "NCHW" || format == "channels_first") + return DATA_LAYOUT_NCHW; + else + CV_Error(Error::StsParseError, "Unknown data_format value: " + format); + } + return DATA_LAYOUT_UNKNOWN; +} + void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) { if (hasLayerAttr(layer, "strides")) { const tensorflow::AttrValue& val = getLayerAttr(layer, "strides"); + int dimX, dimY, dimC; + int layout = getDataLayout(layer); + if (layout == DATA_LAYOUT_NCHW) + { + dimC = 1; dimY = 2; dimX = 3; + } + else + { + dimY = 1; dimX = 2; dimC = 3; + } if (val.list().i_size() != 4 || - val.list().i(0) != 1 || val.list().i(3) != 1) + val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported strides"); - layerParams.set("stride_h", static_cast(val.list().i(1))); - layerParams.set("stride_w", static_cast(val.list().i(2))); + layerParams.set("stride_h", static_cast(val.list().i(dimY))); + layerParams.set("stride_w", static_cast(val.list().i(dimX))); } } @@ -277,11 +304,21 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) if (hasLayerAttr(layer, "ksize")) { const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize"); + int dimX, dimY, dimC; + int layout = getDataLayout(layer); + if (layout == DATA_LAYOUT_NCHW) + { + dimC = 1; dimY = 2; dimX = 3; + } + else + { + dimY = 1; dimX = 2; dimC = 3; + } if (val.list().i_size() != 4 || - val.list().i(0) != 1 || val.list().i(3) != 1) + val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported ksize"); - layerParams.set("kernel_h", static_cast(val.list().i(1))); - layerParams.set("kernel_w", static_cast(val.list().i(2))); + layerParams.set("kernel_h", static_cast(val.list().i(dimY))); + layerParams.set("kernel_w", static_cast(val.list().i(dimX))); } else { @@ -375,6 +412,8 @@ private: // and may be used to build the network using binary format only as a weights storage. // This approach is similar to Caffe's `.prorotxt` and `.caffemodel`. tensorflow::GraphDef netTxt; + + std::vector netInputsNames; }; TFImporter::TFImporter(const char *model, const char *config) @@ -442,7 +481,14 @@ void TFImporter::connect(const std::map& layers_name_id_map, Net& n std::map::const_iterator it = layers_name_id_map.find(outPin.name); if (it == layers_name_id_map.end()) CV_Error(Error::StsError, "Input layer not found: " + outPin.name); - network.connect(it->second, outPin.blobIndex, input_layer_id, input_blob_id); + + std::vector::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name); + int blobIndex; + if (inpNameIt == netInputsNames.end()) + blobIndex = outPin.blobIndex; + else + blobIndex = inpNameIt - netInputsNames.begin(); + network.connect(it->second, blobIndex, input_layer_id, input_blob_id); } void TFImporter::connectToAllBlobs(const std::map& layer_id, Net& network, const Pin& outPin, @@ -558,41 +604,45 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map& cons } } +static inline std::string getNodeName(const std::string& tensorName) +{ + return tensorName.substr(0, tensorName.rfind(':')); +} + // If all inputs of specific layer have the same data layout we can say that // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. -static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map& data_layouts) +static int predictOutputDataLayout(const tensorflow::GraphDef& net, + const tensorflow::NodeDef& layer, + const std::map& data_layouts) { - if (hasLayerAttr(layer, "data_format")) - { - std::string format = getLayerAttr(layer, "data_format").s(); - if (format == "NHWC" || format == "channels_last") - return DATA_LAYOUT_NHWC; - else if (format == "NCHW" || format == "channels_first") - return DATA_LAYOUT_NCHW; - else - CV_Error(Error::StsParseError, "Unknown data_format value: " + format); - } + int layout = getDataLayout(layer); + if (layout != DATA_LAYOUT_UNKNOWN) + return layout; // Determine layout by layer's inputs - int layout = DATA_LAYOUT_UNKNOWN; std::map::const_iterator it; for (int i = 0, n = layer.input_size(); i < n; ++i) { - it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':'))); + it = data_layouts.find(getNodeName(layer.input(i))); if (it != data_layouts.end()) { - if (it->second == DATA_LAYOUT_UNKNOWN) - return DATA_LAYOUT_UNKNOWN; - else if (it->second != layout) + if (layout != DATA_LAYOUT_UNKNOWN) { - if (layout == DATA_LAYOUT_UNKNOWN) - layout = it->second; - else + if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN) return DATA_LAYOUT_UNKNOWN; } + else + layout = it->second; } } - return layout; + + if (layout != DATA_LAYOUT_UNKNOWN) + return layout; + + // Determine layout by layer's consumers recursively. + it = data_layouts.find(layer.name()); + CV_Assert(it != data_layouts.end()); + return it->second; } void TFImporter::populateNet(Net dstNet) @@ -610,6 +660,52 @@ void TFImporter::populateNet(Net dstNet) int layersSize = net.node_size(); std::map data_layouts; + // Pre-fill data layouts where they are set explicitly. + // Assuming that nodes are in topological order + for (int i = net.node_size() - 1; i >= 0; --i) + { + const tensorflow::NodeDef& layer = net.node(i); + std::string name = layer.name(); + + int layout = getDataLayout(layer); + std::map::iterator it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + { + it->second = DATA_LAYOUT_UNKNOWN; + layout = DATA_LAYOUT_UNKNOWN; + } + } + else + layout = it->second; + } + else + data_layouts[name] = layout; + + // Specify input layers to have the same data layout. + for (int j = 0; j < layer.input_size(); ++j) + { + name = getNodeName(layer.input(j)); + it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + it->second = DATA_LAYOUT_UNKNOWN; + } + } + else + data_layouts[name] = layout; + } + } // find all Const layers for params std::map value_id; @@ -628,7 +724,8 @@ void TFImporter::populateNet(Net dstNet) if(layers_to_ignore.find(name) != layers_to_ignore.end()) continue; - data_layouts[name] = predictOutputDataLayout(layer, data_layouts); + int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); + data_layouts[name] = predictedLayout; if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative") { @@ -778,7 +875,7 @@ void TFImporter::populateNet(Net dstNet) Pin inp = parsePin(layer.input(ii)); if (layer_id.find(inp.name) == layer_id.end()) CV_Error(Error::StsError, "Input layer not found: " + inp.name); - dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii); + connect(layer_id, dstNet, inp, id, ii); } } } @@ -852,7 +949,7 @@ void TFImporter::populateNet(Net dstNet) // one input only int input_blob_index = kernel_blob_index == 0 ? 1 : 0; connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + data_layouts[name] = DATA_LAYOUT_PLANAR; } else if (type == "Reshape") { @@ -885,6 +982,7 @@ void TFImporter::populateNet(Net dstNet) // one input only connect(layer_id, dstNet, inpId, id, 0); + data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : DATA_LAYOUT_UNKNOWN; } else if (type == "Flatten" || type == "Squeeze") { @@ -923,7 +1021,7 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Flatten", layerParams); layer_id[name] = id; connect(layer_id, dstNet, inpId, id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + data_layouts[name] = DATA_LAYOUT_PLANAR; } else if (type == "Transpose") { @@ -1013,7 +1111,10 @@ void TFImporter::populateNet(Net dstNet) { int axisId = (type == "Concat" ? 0 : layer.input_size() - 1); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); - layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis); + + if (data_layouts[name] == DATA_LAYOUT_NHWC) + axis = toNCHW(axis); + layerParams.set("axis", axis); int id = dstNet.addLayer(name, "Concat", layerParams); layer_id[name] = id; @@ -1028,7 +1129,7 @@ void TFImporter::populateNet(Net dstNet) Pin inp = parsePin(layer.input(ii)); if (layer_id.find(inp.name) == layer_id.end()) CV_Error(Error::StsError, "Input layer not found: " + inp.name); - dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii - from); + connect(layer_id, dstNet, inp, id, ii - from); } } else if (type == "MaxPool") @@ -1060,10 +1161,12 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Placeholder") { - std::vector netInputs(1); - netInputs[0] = name; - layer_id[name] = 0; - dstNet.setInputsNames(netInputs); + if (!hasLayerAttr(layer, "dtype") || + getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag. + { + netInputsNames.push_back(name); + layer_id[name] = 0; + } } else if (type == "Split") { // TODO: determining axis index remapping by input dimensions order of input blob @@ -1201,7 +1304,7 @@ void TFImporter::populateNet(Net dstNet) Pin inp = parsePin(layer.input(ii)); if (layer_id.find(inp.name) == layer_id.end()) CV_Error(Error::StsError, "Input layer not found: " + inp.name); - dstNet.connect(layer_id.at(inp.name), inp.blobIndex, id, ii); + connect(layer_id, dstNet, inp, id, ii); } } } @@ -1719,6 +1822,7 @@ void TFImporter::populateNet(Net dstNet) } } } + dstNet.setInputsNames(netInputsNames); } } // namespace diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 2549d7d352..48fe765224 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -182,11 +182,9 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) throw SkipTestException(""); Mat sample = imread(findDataFile("dnn/street.png", false)); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); - float l1 = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.0007 : 0.0; - float lInf = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 0.011 : 0.0; - + float diffScores = (target == DNN_TARGET_OPENCL_FP16) ? 6e-3 : 0.0; processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", - inp, "detection_out", "", l1, lInf); + inp, "detection_out", "", diffScores); } TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow) diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index 519bf7131c..ec43f3e046 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -157,7 +157,8 @@ static inline bool checkMyriadTarget() net.addLayerToPrev("testLayer", "Identity", lp); net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE); net.setPreferableTarget(cv::dnn::DNN_TARGET_MYRIAD); - net.setInput(cv::Mat::zeros(1, 1, CV_32FC1)); + static int inpDims[] = {1, 2, 3, 4}; + net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0))); try { net.forward(); diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index e28d9dc11e..2232aa4ff3 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -143,7 +143,7 @@ TEST_P(Test_Darknet_nets, YoloVoc) classIds[0] = 6; confidences[0] = 0.750469f; boxes[0] = Rect2d(0.577374, 0.127391, 0.325575, 0.173418); // a car classIds[1] = 1; confidences[1] = 0.780879f; boxes[1] = Rect2d(0.270762, 0.264102, 0.461713, 0.48131); // a bicycle classIds[2] = 11; confidences[2] = 0.901615f; boxes[2] = Rect2d(0.1386, 0.338509, 0.282737, 0.60028); // a dog - double scoreDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 7e-3 : 8e-5; + double scoreDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1e-2 : 8e-5; double iouDiff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.013 : 3e-5; testDarknetModel("yolo-voc.cfg", "yolo-voc.weights", outNames, classIds, confidences, boxes, backendId, targetId, scoreDiff, iouDiff); diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 3de7f61c5d..720447afb9 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -925,6 +925,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy) Mat out = net.forward(); normAssert(outDefault, out); + + std::vector outLayers = net.getUnconnectedOutLayers(); + ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge"); + ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat"); } // 1. Create a .prototxt file with the following network: @@ -1183,6 +1187,7 @@ TEST(Layer_Test_PoolingIndices, Accuracy) } } } + net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setInput(blobFromImage(inp)); std::vector outputs; diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 5ac8890e50..d4ffc94399 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -127,6 +127,7 @@ TEST_P(Test_TensorFlow_layers, conv) runTensorFlowNet("atrous_conv2d_same", targetId); runTensorFlowNet("depthwise_conv2d", targetId); runTensorFlowNet("keras_atrous_conv2d_same", targetId); + runTensorFlowNet("conv_pool_nchw", targetId); } TEST_P(Test_TensorFlow_layers, padding) @@ -142,9 +143,10 @@ TEST_P(Test_TensorFlow_layers, eltwise_add_mul) runTensorFlowNet("eltwise_add_mul", GetParam()); } -TEST_P(Test_TensorFlow_layers, pad_and_concat) +TEST_P(Test_TensorFlow_layers, concat) { runTensorFlowNet("pad_and_concat", GetParam()); + runTensorFlowNet("concat_axis_1", GetParam()); } TEST_P(Test_TensorFlow_layers, batch_norm) @@ -440,4 +442,20 @@ TEST(Test_TensorFlow, resize_bilinear) runTensorFlowNet("resize_bilinear_factor"); } +TEST(Test_TensorFlow, two_inputs) +{ + Net net = readNet(path("two_inputs_net.pbtxt")); + net.setPreferableBackend(DNN_BACKEND_OPENCV); + + Mat firstInput(2, 3, CV_32FC1), secondInput(2, 3, CV_32FC1); + randu(firstInput, -1, 1); + randu(secondInput, -1, 1); + + net.setInput(firstInput, "first_input"); + net.setInput(secondInput, "second_input"); + Mat out = net.forward(); + + normAssert(out, firstInput + secondInput); +} + } diff --git a/modules/imgcodecs/src/grfmt_sunras.cpp b/modules/imgcodecs/src/grfmt_sunras.cpp index 6398db567f..b419b7795a 100644 --- a/modules/imgcodecs/src/grfmt_sunras.cpp +++ b/modules/imgcodecs/src/grfmt_sunras.cpp @@ -175,8 +175,6 @@ bool SunRasterDecoder::readData( Mat& img ) AutoBuffer _src(src_pitch + 32); uchar* src = _src; - AutoBuffer _bgr(m_width*3 + 32); - uchar* bgr = _bgr; if( !color && m_maptype == RMT_EQUAL_RGB ) CvtPaletteToGray( m_palette, gray_palette, 1 << m_bpp ); @@ -340,16 +338,18 @@ bad_decoding_end: case 24: for( y = 0; y < m_height; y++, data += step ) { - m_strm.getBytes( color ? data : bgr, src_pitch ); + m_strm.getBytes(src, src_pitch ); if( color ) { if( m_type == RAS_FORMAT_RGB ) - icvCvt_RGB2BGR_8u_C3R( data, 0, data, 0, cvSize(m_width,1) ); + icvCvt_RGB2BGR_8u_C3R(src, 0, data, 0, cvSize(m_width,1) ); + else + memcpy(data, src, std::min(step, (size_t)src_pitch)); } else { - icvCvt_BGR2Gray_8u_C3C1R( bgr, 0, data, 0, cvSize(m_width,1), + icvCvt_BGR2Gray_8u_C3C1R(src, 0, data, 0, cvSize(m_width,1), m_type == RAS_FORMAT_RGB ? 2 : 0 ); } } diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index df6ca9a7ed..f0136b0f1c 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -670,6 +670,14 @@ public: void groupRectangles(std::vector& rectList, std::vector& weights, int groupThreshold, double eps) const; }; +/** @brief Detect QR code in image and return minimum area of quadrangle that describes QR code. + @param in Matrix of the type CV_8UC1 containing an image where QR code are detected. + @param points Output vector of vertices of a quadrangle of minimal area that describes QR code. + @param eps_x Epsilon neighborhood, which allows you to determine the horizontal pattern of the scheme 1:1:3:1:1 according to QR code standard. + @param eps_y Epsilon neighborhood, which allows you to determine the vertical pattern of the scheme 1:1:3:1:1 according to QR code standard. + */ +CV_EXPORTS bool detectQRCode(InputArray in, std::vector &points, double eps_x = 0.2, double eps_y = 0.1); + //! @} objdetect } diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp new file mode 100644 index 0000000000..f7c40a744f --- /dev/null +++ b/modules/objdetect/src/qrcode.cpp @@ -0,0 +1,775 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2018, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +#include "precomp.hpp" +#include "opencv2/objdetect.hpp" +// #include "opencv2/calib3d.hpp" + +#include +#include +#include + +namespace cv +{ +class QRDecode +{ + public: + void init(Mat src, double eps_vertical_ = 0.19, double eps_horizontal_ = 0.09); + void binarization(); + bool localization(); + bool transformation(); + Mat getBinBarcode() { return bin_barcode; } + Mat getLocalizationBarcode() { return local_barcode; } + Mat getTransformationBarcode() { return transform_barcode; } + std::vector getTransformationPoints() { return transformation_points; } + Mat getStraightBarcode() { return straight_barcode; } + protected: + std::vector searchVerticalLines(); + std::vector separateHorizontalLines(std::vector list_lines); + std::vector pointClustering(std::vector list_lines); + void fixationPoints(std::vector &local_point, std::vector &local_len); + Point getTransformationPoint(Point left, Point center, double cos_angle_rotation, + bool right_rotate = true); + Point intersectionLines(Point a1, Point a2, Point b1, Point b2); + std::vector getQuadrilateral(std::vector angle_list); + double getQuadrilateralArea(Point a, Point b, Point c, Point d); + double getCosVectors(Point a, Point b, Point c); + + Mat barcode, bin_barcode, local_barcode, transform_barcode, straight_barcode; + std::vector localization_points, transformation_points; + std::vector localization_length; + double experimental_area; + + double eps_vertical, eps_horizontal; + std::vector result; + std::vector test_lines; + uint8_t next_pixel, future_pixel; + double length, weight; +}; + +void QRDecode::init(Mat src, double eps_vertical_, double eps_horizontal_) +{ + barcode = src; + eps_vertical = eps_vertical_; + eps_horizontal = eps_horizontal_; +} + +void QRDecode::binarization() +{ + Mat filter_barcode; + GaussianBlur(barcode, filter_barcode, Size(3, 3), 0); + threshold(filter_barcode, bin_barcode, 0, 255, THRESH_BINARY + THRESH_OTSU); +} + +bool QRDecode::localization() +{ + cvtColor(bin_barcode, local_barcode, COLOR_GRAY2RGB); + Point begin, end; + + std::vector list_lines_x = searchVerticalLines(); + std::vector list_lines_y = separateHorizontalLines(list_lines_x); + std::vector result_point = pointClustering(list_lines_y); + for (int i = 0; i < 3; i++) + { + localization_points.push_back( + Point(static_cast(result_point[i][0]), + static_cast(result_point[i][1] + result_point[i][2]))); + localization_length.push_back(result_point[i][2]); + } + + fixationPoints(localization_points, localization_length); + + + if (localization_points.size() != 3) { return false; } + return true; + +} + +std::vector QRDecode::searchVerticalLines() +{ + result.clear(); + int temp_length = 0; + + for (int x = 0; x < bin_barcode.rows; x++) + { + for (int y = 0; y < bin_barcode.cols; y++) + { + if (bin_barcode.at(x, y) > 0) { continue; } + + // --------------- Search vertical lines --------------- // + + test_lines.clear(); + future_pixel = 255; + + for (int i = x; i < bin_barcode.rows - 1; i++) + { + next_pixel = bin_barcode.at(i + 1, y); + temp_length++; + if (next_pixel == future_pixel) + { + future_pixel = 255 - future_pixel; + test_lines.push_back(temp_length); + temp_length = 0; + if (test_lines.size() == 5) { break; } + } + } + + // --------------- Compute vertical lines --------------- // + + if (test_lines.size() == 5) + { + length = 0.0; weight = 0.0; + + for (size_t i = 0; i < test_lines.size(); i++) { length += test_lines[i]; } + + for (size_t i = 0; i < test_lines.size(); i++) + { + if (i == 2) { weight += abs((test_lines[i] / length) - 3.0/7.0); } + else { weight += abs((test_lines[i] / length) - 1.0/7.0); } + } + + if (weight < eps_vertical) + { + Vec3d line; + line[0] = x; line[1] = y, line[2] = length; + result.push_back(line); + } + } + } + } + return result; +} + +std::vector QRDecode::separateHorizontalLines(std::vector list_lines) +{ + result.clear(); + int temp_length = 0; + int x, y; + + for (size_t pnt = 0; pnt < list_lines.size(); pnt++) + { + x = static_cast(list_lines[pnt][0] + list_lines[pnt][2] / 2); + y = static_cast(list_lines[pnt][1]); + + // --------------- Search horizontal up-lines --------------- // + test_lines.clear(); + future_pixel = 255; + + for (int j = y; j < bin_barcode.cols - 1; j++) + { + next_pixel = bin_barcode.at(x, j + 1); + temp_length++; + if (next_pixel == future_pixel) + { + future_pixel = 255 - future_pixel; + test_lines.push_back(temp_length); + temp_length = 0; + if (test_lines.size() == 3) { break; } + } + } + + // --------------- Search horizontal down-lines --------------- // + future_pixel = 255; + + for (int j = y; j >= 1; j--) + { + next_pixel = bin_barcode.at(x, j - 1); + temp_length++; + if (next_pixel == future_pixel) + { + future_pixel = 255 - future_pixel; + test_lines.push_back(temp_length); + temp_length = 0; + if (test_lines.size() == 6) { break; } + } + } + + // --------------- Compute horizontal lines --------------- // + + if (test_lines.size() == 6) + { + length = 0.0; weight = 0.0; + + for (size_t i = 0; i < test_lines.size(); i++) { length += test_lines[i]; } + + for (size_t i = 0; i < test_lines.size(); i++) + { + if (i % 3 == 0) { weight += abs((test_lines[i] / length) - 3.0/14.0); } + else { weight += abs((test_lines[i] / length) - 1.0/ 7.0); } + } + } + + if(weight < eps_horizontal) + { + result.push_back(list_lines[pnt]); + } + } + return result; +} + +std::vector QRDecode::pointClustering(std::vector list_lines) +{ + std::vector centers; + std::vector clusters[3]; + double weight_clusters[3] = {0.0, 0.0, 0.0}; + Point basis[3], temp_pnt; + double temp_norm = 0.0, temp_compute_norm, distance[3]; + + basis[0] = Point(static_cast(list_lines[0][1]), static_cast(list_lines[0][0])); + for (size_t i = 1; i < list_lines.size(); i++) + { + temp_pnt = Point(static_cast(list_lines[i][1]), static_cast(list_lines[i][0])); + temp_compute_norm = norm(basis[0] - temp_pnt); + if (temp_norm < temp_compute_norm) + { + basis[1] = temp_pnt; + temp_norm = temp_compute_norm; + } + } + + for (size_t i = 1; i < list_lines.size(); i++) + { + temp_pnt = Point(static_cast(list_lines[i][1]), static_cast(list_lines[i][0])); + temp_compute_norm = norm(basis[0] - temp_pnt) + norm(basis[1] - temp_pnt); + if (temp_norm < temp_compute_norm) + { + basis[2] = temp_pnt; + temp_norm = temp_compute_norm; + } + } + + for (size_t i = 0; i < list_lines.size(); i++) + { + temp_pnt = Point(static_cast(list_lines[i][1]), static_cast(list_lines[i][0])); + distance[0] = norm(basis[0] - temp_pnt); + distance[1] = norm(basis[1] - temp_pnt); + distance[2] = norm(basis[2] - temp_pnt); + if (distance[0] < distance[1] && distance[0] < distance[2]) + { + clusters[0].push_back(temp_pnt); + weight_clusters[0] += list_lines[i][2]; + } + else if (distance[1] < distance[0] && distance[1] < distance[2]) + { + clusters[1].push_back(temp_pnt); + weight_clusters[1] += list_lines[i][2]; + } + else + { + clusters[2].push_back(temp_pnt); + weight_clusters[2] += list_lines[i][2]; + } + } + + for (int i = 0; i < 3; i++) + { + basis[i] = Point(0, 0); + for (size_t j = 0; j < clusters[i].size(); j++) { basis[i] += clusters[i][j]; } + basis[i] = basis[i] / static_cast(clusters[i].size()); + weight = weight_clusters[i] / (2 * clusters[i].size()); + centers.push_back(Vec3d(basis[i].x, basis[i].y, weight)); + } + + return centers; +} + +void QRDecode::fixationPoints(std::vector &local_point, std::vector &local_len) +{ + double cos_angles[3], norm_triangl[3]; + + norm_triangl[0] = norm(local_point[1] - local_point[2]); + norm_triangl[1] = norm(local_point[0] - local_point[2]); + norm_triangl[2] = norm(local_point[1] - local_point[0]); + + cos_angles[0] = (pow(norm_triangl[1], 2) + pow(norm_triangl[2], 2) - pow(norm_triangl[0], 2)) + / (2 * norm_triangl[1] * norm_triangl[2]); + cos_angles[1] = (pow(norm_triangl[0], 2) + pow(norm_triangl[2], 2) - pow(norm_triangl[1], 2)) + / (2 * norm_triangl[0] * norm_triangl[2]); + cos_angles[2] = (pow(norm_triangl[0], 2) + pow(norm_triangl[1], 2) - pow(norm_triangl[2], 2)) + / (2 * norm_triangl[0] * norm_triangl[1]); + + int i_min_cos = + (cos_angles[0] < cos_angles[1] && cos_angles[0] < cos_angles[2]) ? 0 : + (cos_angles[1] < cos_angles[0] && cos_angles[1] < cos_angles[2]) ? 1 : 2; + + Point temp_pnt; + double tmp_len; + temp_pnt = local_point[0]; + tmp_len = local_len[0]; + local_point[0] = local_point[i_min_cos]; + local_len[0] = local_len[i_min_cos]; + local_point[i_min_cos] = temp_pnt; + local_len[i_min_cos] = tmp_len; + + Mat vector_mult(Size(3, 3), CV_32FC1); + vector_mult.at(0, 0) = 1; + vector_mult.at(1, 0) = 1; + vector_mult.at(2, 0) = 1; + vector_mult.at(0, 1) = static_cast((local_point[1] - local_point[0]).x); + vector_mult.at(1, 1) = static_cast((local_point[1] - local_point[0]).y); + vector_mult.at(0, 2) = static_cast((local_point[2] - local_point[0]).x); + vector_mult.at(1, 2) = static_cast((local_point[2] - local_point[0]).y); + double res_vect_mult = determinant(vector_mult); + if (res_vect_mult < 0) + { + temp_pnt = local_point[1]; + tmp_len = local_len[1]; + local_point[1] = local_point[2]; + local_len[1] = local_len[2]; + local_point[2] = temp_pnt; + local_len[2] = tmp_len; + } +} + +bool QRDecode::transformation() +{ + cvtColor(bin_barcode, transform_barcode, COLOR_GRAY2RGB); + if (localization_points.size() != 3) { return false; } + + Point red = localization_points[0]; + Point green = localization_points[1]; + Point blue = localization_points[2]; + Point adj_b_r_pnt, adj_r_b_pnt, adj_g_r_pnt, adj_r_g_pnt; + Point line_r_b_pnt, line_r_g_pnt, norm_r_b_pnt, norm_r_g_pnt; + adj_b_r_pnt = getTransformationPoint(blue, red, -1); + adj_r_b_pnt = getTransformationPoint(red, blue, -1); + adj_g_r_pnt = getTransformationPoint(green, red, -1); + adj_r_g_pnt = getTransformationPoint(red, green, -1); + line_r_b_pnt = getTransformationPoint(red, blue, -0.91); + line_r_g_pnt = getTransformationPoint(red, green, -0.91); + norm_r_b_pnt = getTransformationPoint(red, blue, 0.0, true); + norm_r_g_pnt = getTransformationPoint(red, green, 0.0, false); + + transformation_points.push_back(intersectionLines( + adj_r_g_pnt, line_r_g_pnt, adj_r_b_pnt, line_r_b_pnt)); + transformation_points.push_back(intersectionLines( + adj_b_r_pnt, norm_r_g_pnt, adj_r_g_pnt, line_r_g_pnt)); + transformation_points.push_back(intersectionLines( + norm_r_b_pnt, adj_g_r_pnt, adj_b_r_pnt, norm_r_g_pnt)); + transformation_points.push_back(intersectionLines( + norm_r_b_pnt, adj_g_r_pnt, adj_r_b_pnt, line_r_b_pnt)); + + experimental_area = getQuadrilateralArea(transformation_points[0], + transformation_points[1], + transformation_points[2], + transformation_points[3]); + std::vector quadrilateral = getQuadrilateral(transformation_points); + transformation_points = quadrilateral; + + int max_length_norm = -1; + size_t transform_size = transformation_points.size(); + for (size_t i = 0; i < transform_size; i++) + { + int len_norm = static_cast(norm(transformation_points[i % transform_size] - + transformation_points[(i + 1) % transform_size])); + if (max_length_norm < len_norm) { max_length_norm = len_norm; } + } + + std::vector perspective_points; + perspective_points.push_back(Point(0, 0)); + perspective_points.push_back(Point(0, max_length_norm)); + perspective_points.push_back(Point(max_length_norm, max_length_norm)); + perspective_points.push_back(Point(max_length_norm, 0)); + + // warpPerspective(bin_barcode, straight_barcode, + // findHomography(transformation_points, perspective_points), + // Size(max_length_norm, max_length_norm)); + return true; +} + +Point QRDecode::getTransformationPoint(Point left, Point center, double cos_angle_rotation, + bool right_rotate) +{ + Point temp_pnt, prev_pnt(0, 0), next_pnt, start_pnt(center); + double temp_delta, min_delta; + int steps = 0; + + future_pixel = 255; + while(true) + { + min_delta = std::numeric_limits::max(); + for (int i = -1; i < 2; i++) + { + for (int j = -1; j < 2; j++) + { + if (i == 0 && j == 0) { continue; } + temp_pnt = Point(start_pnt.x + i, start_pnt.y + j); + temp_delta = abs(getCosVectors(left, center, temp_pnt) - cos_angle_rotation); + if (temp_delta < min_delta && prev_pnt != temp_pnt) + { + next_pnt = temp_pnt; + min_delta = temp_delta; + } + } + } + prev_pnt = start_pnt; + start_pnt = next_pnt; + next_pixel = bin_barcode.at(start_pnt.y, start_pnt.x); + if (next_pixel == future_pixel) + { + future_pixel = 255 - future_pixel; + steps++; + if (steps == 3) { break; } + } + } + + if (cos_angle_rotation == 0.0) + { + Mat vector_mult(Size(3, 3), CV_32FC1); + vector_mult.at(0, 0) = 1; + vector_mult.at(1, 0) = 1; + vector_mult.at(2, 0) = 1; + vector_mult.at(0, 1) = static_cast((left - center).x); + vector_mult.at(1, 1) = static_cast((left - center).y); + vector_mult.at(0, 2) = static_cast((left - start_pnt).x); + vector_mult.at(1, 2) = static_cast((left - start_pnt).y); + double res_vect_mult = determinant(vector_mult); + if (( right_rotate && res_vect_mult < 0) || + (!right_rotate && res_vect_mult > 0)) + { + start_pnt = getTransformationPoint(start_pnt, center, -1); + } + } + + return start_pnt; +} + +Point QRDecode::intersectionLines(Point a1, Point a2, Point b1, Point b2) +{ + Point result_square_angle( + static_cast( + static_cast + ((a1.x * a2.y - a1.y * a2.x) * (b1.x - b2.x) - + (b1.x * b2.y - b1.y * b2.x) * (a1.x - a2.x)) / + ((a1.x - a2.x) * (b1.y - b2.y) - + (a1.y - a2.y) * (b1.x - b2.x))), + static_cast( + static_cast + ((a1.x * a2.y - a1.y * a2.x) * (b1.y - b2.y) - + (b1.x * b2.y - b1.y * b2.x) * (a1.y - a2.y)) / + ((a1.x - a2.x) * (b1.y - b2.y) - + (a1.y - a2.y) * (b1.x - b2.x))) + ); + return result_square_angle; +} + +std::vector QRDecode::getQuadrilateral(std::vector angle_list) +{ + size_t angle_size = angle_list.size(); + uint8_t value, mask_value; + Mat mask(bin_barcode.rows + 2, bin_barcode.cols + 2, CV_8UC1); + for (size_t i = 0; i < angle_size; i++) + { + LineIterator line_iter(bin_barcode, angle_list[ i % angle_size], + angle_list[(i + 1) % angle_size]); + for(int j = 0; j < line_iter.count; j++, ++line_iter) + { + value = bin_barcode.at(line_iter.pos()); + mask_value = mask.at(line_iter.pos() + Point(1, 1)); + if (value == 0 && mask_value == 0) + { + floodFill(bin_barcode, mask, line_iter.pos(), 255); + } + } + } + std::vector locations; + Mat mask_roi = mask(Range(1, bin_barcode.rows - 1), + Range(1, bin_barcode.cols - 1)); + + cv::findNonZero(mask_roi, locations); + + for (size_t i = 0; i < angle_list.size(); i++) + { + locations.push_back(angle_list[i]); + } + + std::vector< std::vector > hull(1), approx_hull(1); + convexHull(Mat(locations), hull[0]); + int hull_size = static_cast(hull[0].size()); + + Point min_pnt; + + std::vector min_abc; + double min_abs_cos_abc, abs_cos_abc; + for (int count = 0; count < 4; count++) + { + min_abs_cos_abc = std::numeric_limits::max(); + for (int i = 0; i < hull_size; i++) + { + Point a = hull[0][ i % hull_size]; + Point b = hull[0][(i + 1) % hull_size]; + Point c = hull[0][(i + 2) % hull_size]; + abs_cos_abc = abs(getCosVectors(a, b, c)); + + bool flag_detect = true; + for (size_t j = 0; j < min_abc.size(); j++) + { + if (min_abc[j] == b) { flag_detect = false; break; } + } + + if (flag_detect && (abs_cos_abc < min_abs_cos_abc)) + { + min_pnt = b; + min_abs_cos_abc = abs_cos_abc; + } + } + min_abc.push_back(min_pnt); + } + + + int min_abc_size = static_cast(min_abc.size()); + std::vector index_min_abc(min_abc_size); + for (int i = 0; i < min_abc_size; i++) + { + for (int j = 0; j < hull_size; j++) + { + if (hull[0][j] == min_abc[i]) { index_min_abc[i] = j; break; } + } + } + + std::vector result_hull_point(angle_size); + double min_norm, temp_norm; + for (size_t i = 0; i < angle_size; i++) + { + min_norm = std::numeric_limits::max(); + Point closest_pnt; + for (int j = 0; j < min_abc_size; j++) + { + if (min_norm > norm(hull[0][index_min_abc[j]] - angle_list[i])) + { + min_norm = norm(hull[0][index_min_abc[j]] - angle_list[i]); + closest_pnt = hull[0][index_min_abc[j]]; + } + } + result_hull_point[i] = closest_pnt; + } + + int start_line[2] = {0, 0}, finish_line[2] = {0, 0}, unstable_pnt = 0; + for (int i = 0; i < hull_size; i++) + { + if (result_hull_point[3] == hull[0][i]) { start_line[0] = i; } + if (result_hull_point[2] == hull[0][i]) { finish_line[0] = start_line[1] = i; } + if (result_hull_point[1] == hull[0][i]) { finish_line[1] = i; } + if (result_hull_point[0] == hull[0][i]) { unstable_pnt = i; } + } + + int index_hull, extra_index_hull, next_index_hull, extra_next_index_hull, count_points; + Point result_side_begin[4], result_side_end[4]; + + min_norm = std::numeric_limits::max(); + index_hull = start_line[0]; + count_points = abs(start_line[0] - finish_line[0]); + do + { + if (count_points > hull_size / 2) { next_index_hull = index_hull + 1; } + else { next_index_hull = index_hull - 1; } + + if (next_index_hull == hull_size) { next_index_hull = 0; } + if (next_index_hull == -1) { next_index_hull = hull_size - 1; } + + Point angle_closest_pnt = norm(hull[0][index_hull] - angle_list[2]) > + norm(hull[0][index_hull] - angle_list[3]) ? angle_list[3] : angle_list[2]; + + Point intrsc_line_hull = + intersectionLines(hull[0][index_hull], hull[0][next_index_hull], + angle_list[2], angle_list[3]); + temp_norm = getCosVectors(hull[0][index_hull], intrsc_line_hull, angle_closest_pnt); + if (min_norm > temp_norm && + norm(hull[0][index_hull] - hull[0][next_index_hull]) > + norm(angle_list[2] - angle_list[3]) / 10) + { + min_norm = temp_norm; + result_side_begin[0] = hull[0][index_hull]; + result_side_end[0] = hull[0][next_index_hull]; + } + + + index_hull = next_index_hull; + } + while(index_hull != finish_line[0]); + + if (min_norm == std::numeric_limits::max()) + { + result_side_begin[0] = angle_list[2]; + result_side_end[0] = angle_list[3]; + } + + min_norm = std::numeric_limits::max(); + index_hull = start_line[1]; + count_points = abs(start_line[1] - finish_line[1]); + do + { + if (count_points > hull_size / 2) { next_index_hull = index_hull + 1; } + else { next_index_hull = index_hull - 1; } + + if (next_index_hull == hull_size) { next_index_hull = 0; } + if (next_index_hull == -1) { next_index_hull = hull_size - 1; } + + Point angle_closest_pnt = norm(hull[0][index_hull] - angle_list[1]) > + norm(hull[0][index_hull] - angle_list[2]) ? angle_list[2] : angle_list[1]; + + Point intrsc_line_hull = + intersectionLines(hull[0][index_hull], hull[0][next_index_hull], + angle_list[1], angle_list[2]); + temp_norm = getCosVectors(hull[0][index_hull], intrsc_line_hull, angle_closest_pnt); + if (min_norm > temp_norm && + norm(hull[0][index_hull] - hull[0][next_index_hull]) > + norm(angle_list[1] - angle_list[2]) / 20) + { + min_norm = temp_norm; + result_side_begin[1] = hull[0][index_hull]; + result_side_end[1] = hull[0][next_index_hull]; + } + + + index_hull = next_index_hull; + } + while(index_hull != finish_line[1]); + + if (min_norm == std::numeric_limits::max()) + { + result_side_begin[1] = angle_list[1]; + result_side_end[1] = angle_list[2]; + } + + double test_norm[4] = { 0.0, 0.0, 0.0, 0.0 }; + int test_index[4]; + for (int i = 0; i < 4; i++) + { + test_index[i] = (i < 2) ? static_cast(start_line[0]) + : static_cast(finish_line[1]); + do + { + next_index_hull = ((i + 1) % 2 != 0) ? test_index[i] + 1 : test_index[i] - 1; + if (next_index_hull == hull_size) { next_index_hull = 0; } + if (next_index_hull == -1) { next_index_hull = hull_size - 1; } + test_norm[i] += norm(hull[0][next_index_hull] - hull[0][unstable_pnt]); + test_index[i] = next_index_hull; + } + while(test_index[i] != unstable_pnt); + } + + std::vector result_angle_list(4), test_result_angle_list(4); + double min_area = std::numeric_limits::max(), test_area; + index_hull = start_line[0]; + do + { + if (test_norm[0] < test_norm[1]) { next_index_hull = index_hull + 1; } + else { next_index_hull = index_hull - 1; } + + if (next_index_hull == hull_size) { next_index_hull = 0; } + if (next_index_hull == -1) { next_index_hull = hull_size - 1; } + + extra_index_hull = finish_line[1]; + do + { + if (test_norm[2] < test_norm[3]) { extra_next_index_hull = extra_index_hull + 1; } + else { extra_next_index_hull = extra_index_hull - 1; } + + if (extra_next_index_hull == hull_size) { extra_next_index_hull = 0; } + if (extra_next_index_hull == -1) { extra_next_index_hull = hull_size - 1; } + + test_result_angle_list[0] + = intersectionLines(result_side_begin[0], result_side_end[0], + result_side_begin[1], result_side_end[1]); + test_result_angle_list[1] + = intersectionLines(result_side_begin[1], result_side_end[1], + hull[0][extra_index_hull], hull[0][extra_next_index_hull]); + test_result_angle_list[2] + = intersectionLines(hull[0][extra_index_hull], hull[0][extra_next_index_hull], + hull[0][index_hull], hull[0][next_index_hull]); + test_result_angle_list[3] + = intersectionLines(hull[0][index_hull], hull[0][next_index_hull], + result_side_begin[0], result_side_end[0]); + test_area = getQuadrilateralArea(test_result_angle_list[0], + test_result_angle_list[1], + test_result_angle_list[2], + test_result_angle_list[3]); + if (min_area > test_area) + { + min_area = test_area; + for (size_t i = 0; i < test_result_angle_list.size(); i++) + { + result_angle_list[i] = test_result_angle_list[i]; + } + } + + extra_index_hull = extra_next_index_hull; + } + while(extra_index_hull != unstable_pnt); + + index_hull = next_index_hull; + } + while(index_hull != unstable_pnt); + + if (norm(result_angle_list[0] - angle_list[2]) > + norm(angle_list[2] - angle_list[1]) / 3) { result_angle_list[0] = angle_list[2]; } + + if (norm(result_angle_list[1] - angle_list[1]) > + norm(angle_list[1] - angle_list[0]) / 3) { result_angle_list[1] = angle_list[1]; } + + if (norm(result_angle_list[2] - angle_list[0]) > + norm(angle_list[0] - angle_list[3]) / 3) { result_angle_list[2] = angle_list[0]; } + + if (norm(result_angle_list[3] - angle_list[3]) > + norm(angle_list[3] - angle_list[2]) / 3) { result_angle_list[3] = angle_list[3]; } + + + + return result_angle_list; +} + +// b __________ c +// / | +// / | +// / S | +// / | +// a --------------- d + +double QRDecode::getQuadrilateralArea(Point a, Point b, Point c, Point d) +{ + double length_sides[4], perimeter = 0.0, result_area = 1.0; + length_sides[0] = norm(a - b); length_sides[1] = norm(b - c); + length_sides[2] = norm(c - d); length_sides[3] = norm(d - a); + + for (int i = 0; i < 4; i++) { perimeter += length_sides[i]; } + perimeter /= 2; + + for (int i = 0; i < 4; i++) + { + result_area *= (perimeter - length_sides[i]); + } + + result_area = sqrt(result_area); + + return result_area; +} + +// / | b +// / | +// / | +// a/ | c + +double QRDecode::getCosVectors(Point a, Point b, Point c) +{ + return ((a - b).x * (c - b).x + (a - b).y * (c - b).y) / (norm(a - b) * norm(c - b)); +} + +CV_EXPORTS bool detectQRCode(InputArray in, std::vector &points, double eps_x, double eps_y) +{ + CV_Assert(in.isMat()); + CV_Assert(in.getMat().type() == CV_8UC1); + QRDecode qrdec; + qrdec.init(in.getMat(), eps_x, eps_y); + qrdec.binarization(); + if (!qrdec.localization()) { return false; } + if (!qrdec.transformation()) { return false; } + points = qrdec.getTransformationPoints(); + return true; +} + +} diff --git a/modules/objdetect/test/test_qrcode.cpp b/modules/objdetect/test/test_qrcode.cpp new file mode 100644 index 0000000000..87f5ce525b --- /dev/null +++ b/modules/objdetect/test/test_qrcode.cpp @@ -0,0 +1,74 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +TEST(Objdetect_QRCode, regression) +{ + String root = cvtest::TS::ptr()->get_data_path() + "qrcode/"; + // String cascades[] = + // { + // root + "haarcascade_frontalface_alt.xml", + // root + "lbpcascade_frontalface.xml", + // String() + // }; + + // vector objects; + // RNG rng((uint64)-1); + + // for( int i = 0; !cascades[i].empty(); i++ ) + // { + // printf("%d. %s\n", i, cascades[i].c_str()); + // CascadeClassifier cascade(cascades[i]); + // for( int j = 0; j < 100; j++ ) + // { + // int width = rng.uniform(1, 100); + // int height = rng.uniform(1, 100); + // Mat img(height, width, CV_8U); + // randu(img, 0, 256); + // cascade.detectMultiScale(img, objects); + // } + // } +} + +}} // namespace diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp index d397ac7aaf..8e01d1645e 100644 --- a/modules/video/include/opencv2/video/tracking.hpp +++ b/modules/video/include/opencv2/video/tracking.hpp @@ -250,7 +250,9 @@ when fullAffine=false. @sa estimateAffine2D, estimateAffinePartial2D, getAffineTransform, getPerspectiveTransform, findHomography */ -CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine ); +CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine); +CV_EXPORTS_W Mat estimateRigidTransform( InputArray src, InputArray dst, bool fullAffine, int ransacMaxIters, double ransacGoodRatio, + int ransacSize0); enum diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index d65f82da1e..6437443459 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -1402,7 +1402,7 @@ namespace cv { static void -getRTMatrix( const Point2f* a, const Point2f* b, +getRTMatrix( const std::vector a, const std::vector b, int count, Mat& M, bool fullAffine ) { CV_Assert( M.isContinuous() ); @@ -1478,6 +1478,12 @@ getRTMatrix( const Point2f* a, const Point2f* b, } cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine ) +{ + return estimateRigidTransform(src1, src2, fullAffine, 500, 0.5, 3); +} + +cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine, int ransacMaxIters, double ransacGoodRatio, + const int ransacSize0) { CV_INSTRUMENT_REGION() @@ -1485,9 +1491,6 @@ cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullA const int COUNT = 15; const int WIDTH = 160, HEIGHT = 120; - const int RANSAC_MAX_ITERS = 500; - const int RANSAC_SIZE0 = 3; - const double RANSAC_GOOD_RATIO = 0.5; std::vector pA, pB; std::vector good_idx; @@ -1499,6 +1502,12 @@ cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullA RNG rng((uint64)-1); int good_count = 0; + if( ransacSize0 < 3 ) + CV_Error( Error::StsBadArg, "ransacSize0 should have value bigger than 2."); + + if( ransacGoodRatio > 1 || ransacGoodRatio < 0) + CV_Error( Error::StsBadArg, "ransacGoodRatio should have value between 0 and 1"); + if( A.size() != B.size() ) CV_Error( Error::StsUnmatchedSizes, "Both input images must have the same size" ); @@ -1587,23 +1596,23 @@ cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullA good_idx.resize(count); - if( count < RANSAC_SIZE0 ) + if( count < ransacSize0 ) return Mat(); Rect brect = boundingRect(pB); + std::vector a(ransacSize0); + std::vector b(ransacSize0); + // RANSAC stuff: // 1. find the consensus - for( k = 0; k < RANSAC_MAX_ITERS; k++ ) + for( k = 0; k < ransacMaxIters; k++ ) { - int idx[RANSAC_SIZE0]; - Point2f a[RANSAC_SIZE0]; - Point2f b[RANSAC_SIZE0]; - - // choose random 3 non-coplanar points from A & B - for( i = 0; i < RANSAC_SIZE0; i++ ) + std::vector idx(ransacSize0); + // choose random 3 non-complanar points from A & B + for( i = 0; i < ransacSize0; i++ ) { - for( k1 = 0; k1 < RANSAC_MAX_ITERS; k1++ ) + for( k1 = 0; k1 < ransacMaxIters; k1++ ) { idx[i] = rng.uniform(0, count); @@ -1623,7 +1632,7 @@ cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullA if( j < i ) continue; - if( i+1 == RANSAC_SIZE0 ) + if( i+1 == ransacSize0 ) { // additional check for non-complanar vectors a[0] = pA[idx[0]]; @@ -1647,11 +1656,11 @@ cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullA break; } - if( k1 >= RANSAC_MAX_ITERS ) + if( k1 >= ransacMaxIters ) break; } - if( i < RANSAC_SIZE0 ) + if( i < ransacSize0 ) continue; // estimate the transformation using 3 points @@ -1665,11 +1674,11 @@ cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullA good_idx[good_count++] = i; } - if( good_count >= count*RANSAC_GOOD_RATIO ) + if( good_count >= count*ransacGoodRatio ) break; } - if( k >= RANSAC_MAX_ITERS ) + if( k >= ransacMaxIters ) return Mat(); if( good_count < count ) @@ -1682,7 +1691,7 @@ cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullA } } - getRTMatrix( &pA[0], &pB[0], good_count, M, fullAffine ); + getRTMatrix( pA, pB, good_count, M, fullAffine ); M.at(0, 2) /= scale; M.at(1, 2) /= scale; diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index 501a0172c5..61240bdf3c 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -83,6 +83,21 @@ #pragma comment(lib, "Mfreadwrite") #ifdef HAVE_DXVA #pragma comment(lib, "d3d11") +// MFCreateDXGIDeviceManager() is available since Win8 only. +// To avoid OpenCV loading failure on Win7 use dynamic detection of this symbol. +// Details: https://github.com/opencv/opencv/issues/11858 +typedef HRESULT (*FN_MFCreateDXGIDeviceManager)(UINT *resetToken, IMFDXGIDeviceManager **ppDeviceManager); +static bool pMFCreateDXGIDeviceManager_initialized = false; +static FN_MFCreateDXGIDeviceManager pMFCreateDXGIDeviceManager = NULL; +static void init_MFCreateDXGIDeviceManager() +{ + HMODULE h = LoadLibraryA("mfplat.dll"); + if (h) + { + pMFCreateDXGIDeviceManager = (FN_MFCreateDXGIDeviceManager)GetProcAddress(h, "MFCreateDXGIDeviceManager"); + } + pMFCreateDXGIDeviceManager_initialized = true; +} #endif #if (WINVER >= 0x0602) // Available since Win 8 #pragma comment(lib, "MinCore_Downlevel") @@ -93,6 +108,8 @@ #include +#include // QISearch + struct IMFMediaType; struct IMFActivate; struct IMFMediaSource; @@ -595,6 +612,77 @@ void MediaType::Clear() } +class SourceReaderCB : public IMFSourceReaderCallback +{ +public: + SourceReaderCB() : + m_nRefCount(1), m_hEvent(CreateEvent(NULL, FALSE, FALSE, NULL)), m_bEOS(FALSE), m_hrStatus(S_OK), m_dwStreamIndex(0) + { + } + + // IUnknown methods + STDMETHODIMP QueryInterface(REFIID iid, void** ppv) CV_OVERRIDE + { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4838) +#endif + static const QITAB qit[] = + { + QITABENT(SourceReaderCB, IMFSourceReaderCallback), + { 0 }, + }; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + return QISearch(this, qit, iid, ppv); + } + STDMETHODIMP_(ULONG) AddRef() CV_OVERRIDE + { + return InterlockedIncrement(&m_nRefCount); + } + STDMETHODIMP_(ULONG) Release() CV_OVERRIDE + { + ULONG uCount = InterlockedDecrement(&m_nRefCount); + if (uCount == 0) + { + delete this; + } + return uCount; + } + + STDMETHODIMP OnReadSample(HRESULT hrStatus, DWORD dwStreamIndex, DWORD dwStreamFlags, LONGLONG llTimestamp, IMFSample *pSample) CV_OVERRIDE; + STDMETHODIMP OnEvent(DWORD, IMFMediaEvent *) CV_OVERRIDE + { + return S_OK; + } + STDMETHODIMP OnFlush(DWORD) CV_OVERRIDE + { + return S_OK; + } + + HRESULT Wait(DWORD dwMilliseconds, _ComPtr& videoSample, BOOL& pbEOS); + +private: + // Destructor is private. Caller should call Release. + virtual ~SourceReaderCB() + { + CV_LOG_WARNING(NULL, "terminating async callback"); + } + +public: + long m_nRefCount; // Reference count. + cv::Mutex m_mutex; + HANDLE m_hEvent; + BOOL m_bEOS; + HRESULT m_hrStatus; + + _ComPtr m_reader; + DWORD m_dwStreamIndex; + _ComPtr m_lastSample; +}; + + /******* Capturing video from camera or file via Microsoft Media Foundation **********/ class CvCapture_MSMF : public cv::IVideoCapture { @@ -604,8 +692,6 @@ public: MODE_HW = 1 } MSMFCapture_Mode; CvCapture_MSMF(); - CvCapture_MSMF(int); - CvCapture_MSMF(const cv::String&); virtual ~CvCapture_MSMF(); virtual bool open(int); virtual bool open(const cv::String&); @@ -643,6 +729,7 @@ protected: _ComPtr videoSample; LONGLONG sampleTime; bool isOpen; + _ComPtr readCallback; // non-NULL for "live" streams (camera capture) }; CvCapture_MSMF::CvCapture_MSMF(): @@ -667,8 +754,6 @@ CvCapture_MSMF::CvCapture_MSMF(): { configureHW(true); } -CvCapture_MSMF::CvCapture_MSMF(int index) : CvCapture_MSMF() { open(index); } -CvCapture_MSMF::CvCapture_MSMF(const cv::String& _filename) : CvCapture_MSMF() { open(_filename); } CvCapture_MSMF::~CvCapture_MSMF() { @@ -686,6 +771,7 @@ void CvCapture_MSMF::close() camid = -1; filename.clear(); } + readCallback.Release(); } bool CvCapture_MSMF::configureHW(bool enable) @@ -693,6 +779,10 @@ bool CvCapture_MSMF::configureHW(bool enable) #ifdef HAVE_DXVA if ((enable && D3DMgr && D3DDev) || (!enable && !D3DMgr && !D3DDev)) return true; + if (!pMFCreateDXGIDeviceManager_initialized) + init_MFCreateDXGIDeviceManager(); + if (enable && !pMFCreateDXGIDeviceManager) + return false; bool reopen = isOpen; int prevcam = camid; @@ -713,7 +803,7 @@ bool CvCapture_MSMF::configureHW(bool enable) { D3DDevMT->SetMultithreadProtected(TRUE); D3DDevMT.Release(); - if (SUCCEEDED(MFCreateDXGIDeviceManager(&mgrRToken, &D3DMgr))) + if (SUCCEEDED(pMFCreateDXGIDeviceManager(&mgrRToken, &D3DMgr))) { if (SUCCEEDED(D3DMgr->ResetDevice(D3DDev.Get(), mgrRToken))) { @@ -854,7 +944,8 @@ bool CvCapture_MSMF::configureOutput(UINT32 width, UINT32 height, double prefFra bool CvCapture_MSMF::open(int _index) { close(); - + if (_index < 0) + return false; _ComPtr msAttr = NULL; if (SUCCEEDED(MFCreateAttributes(&msAttr, 1)) && SUCCEEDED(msAttr->SetGUID( @@ -868,7 +959,6 @@ bool CvCapture_MSMF::open(int _index) { if (count > 0) { - _index = std::min(std::max(0, _index), (int)count - 1); for (int ind = 0; ind < (int)count; ind++) { if (ind == _index && ppDevices[ind]) @@ -887,6 +977,14 @@ bool CvCapture_MSMF::open(int _index) if (D3DMgr) srAttr->SetUnknown(MF_SOURCE_READER_D3D_MANAGER, D3DMgr.Get()); #endif + readCallback = ComPtr(new SourceReaderCB()); + HRESULT hr = srAttr->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK, (IMFSourceReaderCallback*)readCallback.Get()); + if (FAILED(hr)) + { + readCallback.Release(); + continue; + } + if (SUCCEEDED(MFCreateSourceReaderFromMediaSource(mSrc.Get(), srAttr.Get(), &videoFileSource))) { isOpen = true; @@ -958,10 +1056,113 @@ bool CvCapture_MSMF::open(const cv::String& _filename) return isOpen; } + +HRESULT SourceReaderCB::Wait(DWORD dwMilliseconds, _ComPtr& videoSample, BOOL& bEOS) +{ + bEOS = FALSE; + + DWORD dwResult = WaitForSingleObject(m_hEvent, dwMilliseconds); + if (dwResult == WAIT_TIMEOUT) + { + return E_PENDING; + } + else if (dwResult != WAIT_OBJECT_0) + { + return HRESULT_FROM_WIN32(GetLastError()); + } + + bEOS = m_bEOS; + if (!bEOS) + { + cv::AutoLock lock(m_mutex); + videoSample = m_lastSample; + CV_Assert(videoSample); + m_lastSample.Release(); + ResetEvent(m_hEvent); // event is auto-reset, but we need this forced reset due time gap between wait() and mutex hold. + } + + return m_hrStatus; +} + +STDMETHODIMP SourceReaderCB::OnReadSample(HRESULT hrStatus, DWORD dwStreamIndex, DWORD dwStreamFlags, LONGLONG llTimestamp, IMFSample *pSample) +{ + CV_UNUSED(llTimestamp); + + HRESULT hr = 0; + cv::AutoLock lock(m_mutex); + + if (SUCCEEDED(hrStatus)) + { + if (pSample) + { + CV_LOG_DEBUG(NULL, "videoio(MSMF): got frame at " << llTimestamp); + IMFSample* prev = m_lastSample.Get(); + if (prev) + { + CV_LOG_DEBUG(NULL, "videoio(MSMF): drop frame (not processed)"); + } + m_lastSample = pSample; + } + } + else + { + CV_LOG_WARNING(NULL, "videoio(MSMF): OnReadSample() is called with error status: " << hrStatus); + } + + if (MF_SOURCE_READERF_ENDOFSTREAM & dwStreamFlags) + { + // Reached the end of the stream. + m_bEOS = true; + } + m_hrStatus = hrStatus; + + if (FAILED(hr = m_reader->ReadSample(dwStreamIndex, 0, NULL, NULL, NULL, NULL))) + { + CV_LOG_WARNING(NULL, "videoio(MSMF): async ReadSample() call is failed with error status: " << hr); + m_bEOS = true; + } + + if (pSample || m_bEOS) + { + SetEvent(m_hEvent); + } + return S_OK; +} + + bool CvCapture_MSMF::grabFrame() { CV_TRACE_FUNCTION(); - if (isOpen) + if (readCallback) // async "live" capture mode + { + HRESULT hr = 0; + SourceReaderCB* reader = ((SourceReaderCB*)readCallback.Get()); + if (!reader->m_reader) + { + // Initiate capturing with async callback + reader->m_reader = videoFileSource; + reader->m_dwStreamIndex = dwStreamIndex; + if (FAILED(hr = videoFileSource->ReadSample(dwStreamIndex, 0, NULL, NULL, NULL, NULL))) + { + CV_LOG_ERROR(NULL, "videoio(MSMF): can't grab frame - initial async ReadSample() call failed: " << hr); + reader->m_reader = NULL; + return false; + } + } + BOOL bEOS = false; + if (FAILED(hr = reader->Wait(10000, videoSample, bEOS))) // 10 sec + { + CV_LOG_WARNING(NULL, "videoio(MSMF): can't grab frame. Error: " << hr); + return false; + } + if (bEOS) + { + CV_LOG_WARNING(NULL, "videoio(MSMF): EOS signal. Capture stream is lost"); + return false; + } + return true; + } + else if (isOpen) { DWORD streamIndex, flags; videoSample.Release(); @@ -1712,17 +1913,25 @@ bool CvCapture_MSMF::setProperty( int property_id, double value ) cv::Ptr cv::cvCreateCapture_MSMF( int index ) { - cv::Ptr capture = cv::makePtr(index); - if (capture && capture->isOpened()) - return capture; + cv::Ptr capture = cv::makePtr(); + if (capture) + { + capture->open(index); + if (capture->isOpened()) + return capture; + } return cv::Ptr(); } cv::Ptr cv::cvCreateCapture_MSMF (const cv::String& filename) { - cv::Ptr capture = cv::makePtr(filename); - if (capture && capture->isOpened()) - return capture; + cv::Ptr capture = cv::makePtr(); + if (capture) + { + capture->open(filename); + if (capture->isOpened()) + return capture; + } return cv::Ptr(); } @@ -1736,8 +1945,6 @@ class CvVideoWriter_MSMF : public cv::IVideoWriter { public: CvVideoWriter_MSMF(); - CvVideoWriter_MSMF(const cv::String& filename, int fourcc, - double fps, cv::Size frameSize, bool isColor); virtual ~CvVideoWriter_MSMF(); virtual bool open(const cv::String& filename, int fourcc, double fps, cv::Size frameSize, bool isColor); @@ -1774,7 +1981,6 @@ CvVideoWriter_MSMF::CvVideoWriter_MSMF(): initiated(false) { } -CvVideoWriter_MSMF::CvVideoWriter_MSMF(const cv::String& filename, int fourcc, double fps, cv::Size frameSize, bool isColor) : CvVideoWriter_MSMF() { open(filename, fourcc, fps, frameSize, isColor); } CvVideoWriter_MSMF::~CvVideoWriter_MSMF() { @@ -1945,9 +2151,13 @@ void CvVideoWriter_MSMF::write(cv::InputArray img) cv::Ptr cv::cvCreateVideoWriter_MSMF( const cv::String& filename, int fourcc, double fps, cv::Size frameSize, int isColor ) { - cv::Ptr writer = cv::makePtr(filename, fourcc, fps, frameSize, isColor != 0); - if (writer && writer->isOpened()) - return writer; + cv::Ptr writer = cv::makePtr(); + if (writer) + { + writer->open(filename, fourcc, fps, frameSize, isColor != 0); + if (writer->isOpened()) + return writer; + } return cv::Ptr(); } diff --git a/samples/cpp/gstreamer_pipeline.cpp b/samples/cpp/gstreamer_pipeline.cpp index 0d467754b3..4ad1daa4c2 100644 --- a/samples/cpp/gstreamer_pipeline.cpp +++ b/samples/cpp/gstreamer_pipeline.cpp @@ -4,444 +4,373 @@ #include "opencv2/highgui.hpp" #include #include +#include using namespace std; using namespace cv; -class GStreamerPipeline +//================================================================================ + +template +inline typename M::mapped_type getValue(const M &dict, const typename M::key_type &key, const string & errorMessage) { - public: - // Preprocessing arguments command line - GStreamerPipeline(int argc, char *argv[]) + typename M::const_iterator it = dict.find(key); + if (it == dict.end()) { - const string keys = - "{h help usage ? | | print help messages }" - "{m mode | | coding mode (supported: encode, decode) }" - "{p pipeline |default | pipeline name (supported: 'default', 'gst-basic', 'gst-vaapi', 'gst-libav', 'ffmpeg') }" - "{cd codec |h264 | codec name (supported: 'h264', 'h265', 'mpeg2', 'mpeg4', 'mjpeg', 'vp8') }" - "{f file path | | path to file }" - "{vr resolution |720p | video resolution for encoding (supported: '720p', '1080p', '4k') }" - "{fps |30 | fix frame per second for encoding (supported: fps > 0) }" - "{fm fast | | fast measure fps }"; - cmd_parser = new CommandLineParser(argc, argv, keys); - cmd_parser->about("This program shows how to read a video file with GStreamer pipeline with OpenCV."); + CV_Error(Error::StsBadArg, errorMessage); + } + return it->second; +} - if (cmd_parser->has("help")) - { - cmd_parser->printMessage(); - CV_Error(Error::StsBadArg, "Called help."); - } +inline map sizeByResolution() +{ + map res; + res["720p"] = Size(1280, 720); + res["1080p"] = Size(1920, 1080); + res["4k"] = Size(3840, 2160); + return res; +} - fast_measure = cmd_parser->has("fast"); // fast measure fps - fix_fps = cmd_parser->get("fps"); // fixed frame per second - pipeline = cmd_parser->get("pipeline"), // gstreamer pipeline type - mode = cmd_parser->get("mode"), // coding mode - codec = cmd_parser->get("codec"), // codec type - file_name = cmd_parser->get("file"), // path to videofile - resolution = cmd_parser->get("resolution"); // video resolution +inline map fourccByCodec() +{ + map res; + res["h264"] = VideoWriter::fourcc('H','2','6','4'); + res["h265"] = VideoWriter::fourcc('H','E','V','C'); + res["mpeg2"] = VideoWriter::fourcc('M','P','E','G'); + res["mpeg4"] = VideoWriter::fourcc('M','P','4','2'); + res["mjpeg"] = VideoWriter::fourcc('M','J','P','G'); + res["vp8"] = VideoWriter::fourcc('V','P','8','0'); + return res; +} - size_t found = file_name.rfind("."); - if (found != string::npos) - { - container = file_name.substr(found + 1); // container type - } - else { CV_Error(Error::StsBadArg, "Can not parse container extension."); } +inline map defaultEncodeElementByCodec() +{ + map res; + res["h264"] = "x264enc"; + res["h265"] = "x265enc"; + res["mpeg2"] = "mpeg2enc"; + res["mjpeg"] = "jpegenc"; + res["vp8"] = "vp8enc"; + return res; +} - if (!cmd_parser->check()) - { - cmd_parser->printErrors(); - CV_Error(Error::StsBadArg, "Failed parse arguments."); - } - } +inline map VAAPIEncodeElementByCodec() +{ + map res; + res["h264"] = "parsebin ! vaapih264enc"; + res["h265"] = "parsebin ! vaapih265enc"; + res["mpeg2"] = "parsebin ! vaapimpeg2enc"; + res["mjpeg"] = "parsebin ! vaapijpegenc"; + res["vp8"] = "parsebin ! vaapivp8enc"; + return res; +} - ~GStreamerPipeline() { delete cmd_parser; } +inline map mfxDecodeElementByCodec() +{ + map res; + res["h264"] = "parsebin ! mfxh264dec"; + res["h265"] = "parsebin ! mfxhevcdec"; + res["mpeg2"] = "parsebin ! mfxmpeg2dec"; + res["mjpeg"] = "parsebin ! mfxjpegdec"; + return res; +} - // Start pipeline - int run() - { - if (mode == "decode") { if (createDecodePipeline() < 0) return -1; } - else if (mode == "encode") { if (createEncodePipeline() < 0) return -1; } - else - { - cout << "Unsupported mode: " << mode << endl; - cmd_parser->printErrors(); - return -1; - } - cout << "_____________________________________" << endl; - cout << "Pipeline " << mode << ":" << endl; - cout << stream_pipeline.str() << endl; - // Choose a show video or only measure fps - cout << "_____________________________________" << endl; - cout << "Start measure frame per seconds (fps)" << endl; - cout << "Loading ..." << endl; +inline map mfxEncodeElementByCodec() +{ + map res; + res["h264"] = "mfxh264enc"; + res["h265"] = "mfxhevcenc"; + res["mpeg2"] = "mfxmpeg2enc"; + res["mjpeg"] = "mfxjpegenc"; + return res; +} - vector tick_counts; +inline map libavDecodeElementByCodec() +{ + map res; + res["h264"] = "parsebin ! avdec_h264"; + res["h265"] = "parsebin ! avdec_h265"; + res["mpeg2"] = "parsebin ! avdec_mpeg2video"; + res["mpeg4"] = "parsebin ! avdec_mpeg4"; + res["mjpeg"] = "parsebin ! avdec_mjpeg"; + res["vp8"] = "parsebin ! avdec_vp8"; + return res; +} - cout << "Start " << mode << ": " << file_name; - cout << " (" << pipeline << ")" << endl; +inline map libavEncodeElementByCodec() +{ + map res; + res["h264"] = "avenc_h264"; + res["h265"] = "avenc_h265"; + res["mpeg2"] = "avenc_mpeg2video"; + res["mpeg4"] = "avenc_mpeg4"; + res["mjpeg"] = "avenc_mjpeg"; + res["vp8"] = "avenc_vp8"; + return res; +} - while(true) - { - int64 temp_count_tick = 0; - if (mode == "decode") - { - Mat frame; - temp_count_tick = getTickCount(); - cap >> frame; - temp_count_tick = getTickCount() - temp_count_tick; - if (frame.empty()) { break; } - } - else if (mode == "encode") - { - Mat element; - while(!cap.grab()); - cap.retrieve(element); - temp_count_tick = getTickCount(); - wrt << element; - temp_count_tick = getTickCount() - temp_count_tick; - } +inline map demuxPluginByContainer() +{ + map res; + res["avi"] = "avidemux"; + res["mp4"] = "qtdemux"; + res["mov"] = "qtdemux"; + res["mkv"] = "matroskademux"; + return res; +} - tick_counts.push_back(static_cast(temp_count_tick)); - if (((mode == "decode") && fast_measure && (tick_counts.size() > 1e3)) || - ((mode == "encode") && (tick_counts.size() > 3e3)) || - ((mode == "encode") && fast_measure && (tick_counts.size() > 1e2))) - { break; } +inline map muxPluginByContainer() +{ + map res; + res["avi"] = "avimux"; + res["mp4"] = "qtmux"; + res["mov"] = "qtmux"; + res["mkv"] = "matroskamux"; + return res; +} - } - double time_fps = sum(tick_counts)[0] / getTickFrequency(); +//================================================================================ - if (tick_counts.size() != 0) - { - cout << "Finished: " << tick_counts.size() << " in " << time_fps <<" sec ~ " ; - cout << tick_counts.size() / time_fps <<" fps " << endl; - } - else - { - cout << "Failed " << mode << ": " << file_name; - cout << " (" << pipeline << ")" << endl; - return -1; - } - return 0; +inline string containerByName(const string &name) +{ + size_t found = name.rfind("."); + if (found != string::npos) + { + return name.substr(found + 1); // container type } + return string(); +} + +//================================================================================ - // Free video resource - void close() +inline Ptr createCapture(const string &backend, const string &file_name, const string &codec) +{ + if (backend == "gst-default") { - cap.release(); - wrt.release(); + cout << "Created GStreamer capture ( " << file_name << " )" << endl; + return makePtr(file_name, CAP_GSTREAMER); } - - private: - // Choose the constructed GStreamer pipeline for decode - int createDecodePipeline() + else if (backend.find("gst") == 0) { - if (pipeline == "default") { - cap = VideoCapture(file_name, CAP_GSTREAMER); - } - else if (pipeline.find("gst") == 0) - { - stream_pipeline << "filesrc location=\"" << file_name << "\""; - stream_pipeline << " ! " << getGstMuxPlugin(); - - if (pipeline.find("basic") == 4) - { - stream_pipeline << getGstDefaultCodePlugin(); - } - else if (pipeline.find("vaapi1710") == 4) - { - stream_pipeline << getGstVaapiCodePlugin(); - } - else if (pipeline.find("libav") == 4) - { - stream_pipeline << getGstAvCodePlugin(); - } - else - { - cout << "Unsupported pipeline: " << pipeline << endl; - cmd_parser->printErrors(); - return -1; - } - - stream_pipeline << " ! videoconvert n-threads=" << getNumThreads(); - stream_pipeline << " ! appsink sync=false"; - cap = VideoCapture(stream_pipeline.str(), CAP_GSTREAMER); - } - else if (pipeline == "ffmpeg") - { - cap = VideoCapture(file_name, CAP_FFMPEG); - stream_pipeline << "default pipeline for ffmpeg" << endl; - } + ostringstream line; + line << "filesrc location=\"" << file_name << "\""; + line << " ! "; + line << getValue(demuxPluginByContainer(), containerByName(file_name), "Invalid container"); + line << " ! "; + if (backend.find("basic") == 4) + line << "decodebin"; + else if (backend.find("vaapi") == 4) + line << "vaapidecodebin"; + else if (backend.find("libav") == 4) + line << getValue(libavDecodeElementByCodec(), codec, "Invalid codec"); + else if (backend.find("mfx") == 4) + line << getValue(mfxDecodeElementByCodec(), codec, "Invalid or unsupported codec"); else - { - cout << "Unsupported pipeline: " << pipeline << endl; - cmd_parser->printErrors(); - return -1; - } - return 0; + return Ptr(); + line << " ! videoconvert n-threads=" << getNumThreads(); + line << " ! appsink sync=false"; + cout << "Created GStreamer capture ( " << line.str() << " )" << endl; + return makePtr(line.str(), CAP_GSTREAMER); } - - // Choose the constructed GStreamer pipeline for encode - int createEncodePipeline() + else if (backend == "ffmpeg") { - if (checkConfiguration() < 0) return -1; - ostringstream test_pipeline; - test_pipeline << "videotestsrc pattern=smpte"; - test_pipeline << " ! video/x-raw, " << getVideoSettings(); - test_pipeline << " ! appsink sync=false"; - cap = VideoCapture(test_pipeline.str(), CAP_GSTREAMER); - - if (pipeline == "default") { - wrt = VideoWriter(file_name, CAP_GSTREAMER, getFourccCode(), fix_fps, fix_size, true); - } - else if (pipeline.find("gst") == 0) - { - stream_pipeline << "appsrc ! videoconvert n-threads=" << getNumThreads() << " ! "; + cout << "Created FFmpeg capture ( " << file_name << " )" << endl; + return makePtr(file_name, CAP_FFMPEG); + } + return Ptr(); +} - if (pipeline.find("basic") == 4) - { - stream_pipeline << getGstDefaultCodePlugin(); - } - else if (pipeline.find("vaapi1710") == 4) - { - stream_pipeline << getGstVaapiCodePlugin(); - } - else if (pipeline.find("libav") == 4) - { - stream_pipeline << getGstAvCodePlugin(); - } - else - { - cout << "Unsupported pipeline: " << pipeline << endl; - cmd_parser->printErrors(); - return -1; - } +inline Ptr createSynthSource(Size sz, unsigned fps) +{ + ostringstream line; + line << "videotestsrc pattern=smpte"; + line << " ! video/x-raw"; + line << ",width=" << sz.width << ",height=" << sz.height; + if (fps > 0) + line << ",framerate=" << fps << "/1"; + line << " ! appsink sync=false"; + cout << "Created synthetic video source ( " << line.str() << " )" << endl; + return makePtr(line.str(), CAP_GSTREAMER); +} - stream_pipeline << " ! " << getGstMuxPlugin(); - stream_pipeline << " ! filesink location=\"" << file_name << "\""; - wrt = VideoWriter(stream_pipeline.str(), CAP_GSTREAMER, 0, fix_fps, fix_size, true); - } - else if (pipeline == "ffmpeg") - { - wrt = VideoWriter(file_name, CAP_FFMPEG, getFourccCode(), fix_fps, fix_size, true); - stream_pipeline << "default pipeline for ffmpeg" << endl; - } - else - { - cout << "Unsupported pipeline: " << pipeline << endl; - cmd_parser->printErrors(); - return -1; - } - return 0; +inline Ptr createWriter(const string &backend, const string &file_name, const string &codec, Size sz, unsigned fps) +{ + if (backend == "gst-default") + { + cout << "Created GStreamer writer ( " << file_name << ", FPS=" << fps << ", Size=" << sz << ")" << endl; + return makePtr(file_name, CAP_GSTREAMER, getValue(fourccByCodec(), codec, "Invalid codec"), fps, sz, true); } - - // Choose video resolution for encoding - string getVideoSettings() + else if (backend.find("gst") == 0) { - ostringstream video_size; - if (fix_fps > 0) { video_size << "framerate=" << fix_fps << "/1, "; } - else - { - cout << "Unsupported fps (< 0): " << fix_fps << endl; - cmd_parser->printErrors(); - return string(); - } - - if (resolution == "720p") { fix_size = Size(1280, 720); } - else if (resolution == "1080p") { fix_size = Size(1920, 1080); } - else if (resolution == "4k") { fix_size = Size(3840, 2160); } + ostringstream line; + line << "appsrc ! videoconvert n-threads=" << getNumThreads() << " ! "; + if (backend.find("basic") == 4) + line << getValue(defaultEncodeElementByCodec(), codec, "Invalid codec"); + else if (backend.find("vaapi") == 4) + line << getValue(VAAPIEncodeElementByCodec(), codec, "Invalid codec"); + else if (backend.find("libav") == 4) + line << getValue(libavEncodeElementByCodec(), codec, "Invalid codec"); + else if (backend.find("mfx") == 4) + line << getValue(mfxEncodeElementByCodec(), codec, "Invalid codec"); else - { - cout << "Unsupported video resolution: " << resolution << endl; - cmd_parser->printErrors(); - return string(); - } - - video_size << "width=" << fix_size.width << ", height=" << fix_size.height; - return video_size.str(); + return Ptr(); + line << " ! "; + line << getValue(muxPluginByContainer(), containerByName(file_name), "Invalid container"); + line << " ! "; + line << "filesink location=\"" << file_name << "\""; + cout << "Created GStreamer writer ( " << line.str() << " )" << endl; + return makePtr(line.str(), CAP_GSTREAMER, 0, fps, sz, true); } - - // Choose a video container - string getGstMuxPlugin() + else if (backend == "ffmpeg") { - ostringstream plugin; - if (container == "avi") { plugin << "avi"; } - else if (container == "mp4") { plugin << "qt"; } - else if (container == "mov") { plugin << "qt"; } - else if (container == "mkv") { plugin << "matroska"; } - else - { - cout << "Unsupported container: " << container << endl; - cmd_parser->printErrors(); - return string(); - } + cout << "Created FFMpeg writer ( " << file_name << ", FPS=" << fps << ", Size=" << sz << " )" << endl; + return makePtr(file_name, CAP_FFMPEG, getValue(fourccByCodec(), codec, "Invalid codec"), fps, sz, true); + } + return Ptr(); +} - if (mode == "decode") { plugin << "demux"; } - else if (mode == "encode") { plugin << "mux"; } - else - { - cout << "Unsupported mode: " << mode << endl; - cmd_parser->printErrors(); - return string(); - } +//================================================================================ - return plugin.str(); +int main(int argc, char *argv[]) +{ + const string keys = + "{h help usage ? | | print help messages }" + "{m mode |decode | coding mode (supported: encode, decode) }" + "{b backend |default | video backend (supported: 'gst-default', 'gst-basic', 'gst-vaapi', 'gst-libav', 'gst-mfx', 'ffmpeg') }" + "{c codec |h264 | codec name (supported: 'h264', 'h265', 'mpeg2', 'mpeg4', 'mjpeg', 'vp8') }" + "{f file path | | path to file }" + "{r resolution |720p | video resolution for encoding (supported: '720p', '1080p', '4k') }" + "{fps |30 | fix frame per second for encoding (supported: fps > 0) }" + "{fast | | fast measure fps }"; + CommandLineParser cmd_parser(argc, argv, keys); + cmd_parser.about("This program measures performance of video encoding and decoding using different backends OpenCV."); + if (cmd_parser.has("help")) + { + cmd_parser.printMessage(); + return 0; } - - // Choose a libav codec - string getGstAvCodePlugin() + bool fast_measure = cmd_parser.has("fast"); // fast measure fps + unsigned fix_fps = cmd_parser.get("fps"); // fixed frame per second + string backend = cmd_parser.get("backend"); // video backend + string mode = cmd_parser.get("mode"); // coding mode + string codec = cmd_parser.get("codec"); // codec type + string file_name = cmd_parser.get("file"); // path to videofile + string resolution = cmd_parser.get("resolution"); // video resolution + if (!cmd_parser.check()) { - ostringstream plugin; - if (mode == "decode") - { - if (codec == "h264") { plugin << "h264parse ! "; } - else if (codec == "h265") { plugin << "h265parse ! "; } - plugin << "avdec_"; - } - else if (mode == "encode") { plugin << "avenc_"; } - else - { - cout << "Unsupported mode: " << mode << endl; - cmd_parser->printErrors(); - return string(); - } - - if (codec == "h264") { plugin << "h264"; } - else if (codec == "h265") { plugin << "h265"; } - else if (codec == "mpeg2") { plugin << "mpeg2video"; } - else if (codec == "mpeg4") { plugin << "mpeg4"; } - else if (codec == "mjpeg") { plugin << "mjpeg"; } - else if (codec == "vp8") { plugin << "vp8"; } - else - { - cout << "Unsupported libav codec: " << codec << endl; - cmd_parser->printErrors(); - return string(); - } - - return plugin.str(); + cmd_parser.printErrors(); + return -1; } + if (mode != "encode" && mode != "decode") + { + cout << "Unsupported mode: " << mode << endl; + return -1; + } + cout << "Mode: " << mode << ", Backend: " << backend << ", File: " << file_name << ", Codec: " << codec << endl; - // Choose a vaapi codec - string getGstVaapiCodePlugin() + TickMeter total; + Ptr cap; + Ptr wrt; + try { - ostringstream plugin; if (mode == "decode") { - plugin << "vaapidecodebin"; - if (container == "mkv") { plugin << " ! autovideoconvert"; } - else { plugin << " ! video/x-raw, format=YV12"; } + cap = createCapture(backend, file_name, codec); + if (!cap) + { + cout << "Failed to create video capture" << endl; + return -3; + } + if (!cap->isOpened()) + { + cout << "Capture is not opened" << endl; + return -4; + } } else if (mode == "encode") { - if (codec == "h264") { plugin << "vaapih264enc"; } - else if (codec == "h265") { plugin << "vaapih265enc"; } - else if (codec == "mpeg2") { plugin << "vaapimpeg2enc"; } - else if (codec == "mjpeg") { plugin << "vaapijpegenc"; } - else if (codec == "vp8") { plugin << "vaapivp8enc"; } - else + Size sz = getValue(sizeByResolution(), resolution, "Invalid resolution"); + cout << "FPS: " << fix_fps << ", Frame size: " << sz << endl; + cap = createSynthSource(sz, fix_fps); + wrt = createWriter(backend, file_name, codec, sz, fix_fps); + if (!cap || !wrt) { - cout << "Unsupported vaapi codec: " << codec << endl; - cmd_parser->printErrors(); - return string(); + cout << "Failed to create synthetic video source or video writer" << endl; + return -3; + } + if (!cap->isOpened() || !wrt->isOpened()) + { + cout << "Synthetic video source or video writer is not opened" << endl; + return -4; } } - else - { - cout << "Unsupported mode: " << resolution << endl; - cmd_parser->printErrors(); - return string(); - } - return plugin.str(); + } + catch (...) + { + cout << "Unsupported parameters" << endl; + return -2; } - // Choose a default codec - string getGstDefaultCodePlugin() + TickMeter tick; + Mat frame; + Mat element; + total.start(); + while(true) { - ostringstream plugin; if (mode == "decode") { - plugin << " ! decodebin"; + tick.start(); + if (!cap->grab()) + { + cout << "No more frames - break" << endl; + break; + } + if (!cap->retrieve(frame)) + { + cout << "Failed to retrieve frame - break" << endl; + break; + } + if (frame.empty()) + { + cout << "Empty frame received - break" << endl; + break; + } + tick.stop(); } else if (mode == "encode") { - if (codec == "h264") { plugin << "x264enc"; } - else if (codec == "h265") { plugin << "x265enc"; } - else if (codec == "mpeg2") { plugin << "mpeg2enc"; } - else if (codec == "mjpeg") { plugin << "jpegenc"; } - else if (codec == "vp8") { plugin << "vp8enc"; } - else + int limit = 100; + while (!cap->grab() && --limit != 0) { - cout << "Unsupported default codec: " << codec << endl; - cmd_parser->printErrors(); - return string(); + cout << "Skipping empty input frame - " << limit << endl; } + cap->retrieve(element); + tick.start(); + *wrt << element; + tick.stop(); } - else - { - cout << "Unsupported mode: " << resolution << endl; - cmd_parser->printErrors(); - return string(); - } - return plugin.str(); - } - // Get fourcc for codec - int getFourccCode() - { - if (codec == "h264") { return VideoWriter::fourcc('H','2','6','4'); } - else if (codec == "h265") { return VideoWriter::fourcc('H','E','V','C'); } - else if (codec == "mpeg2") { return VideoWriter::fourcc('M','P','E','G'); } - else if (codec == "mpeg4") { return VideoWriter::fourcc('M','P','4','2'); } - else if (codec == "mjpeg") { return VideoWriter::fourcc('M','J','P','G'); } - else if (codec == "vp8") { return VideoWriter::fourcc('V','P','8','0'); } - else + + if (fast_measure && tick.getCounter() >= 1000) { - cout << "Unsupported ffmpeg codec: " << codec << endl; - cmd_parser->printErrors(); - return 0; + cout << "Fast mode frame limit reached - break" << endl; + break; } - } - - // Check bad configuration - int checkConfiguration() - { - if ((codec == "mpeg2" && getGstMuxPlugin() == "qtmux") || - (codec == "h265" && getGstMuxPlugin() == "avimux") || - (pipeline == "gst-libav" && (codec == "h264" || codec == "h265")) || - (pipeline == "gst-vaapi1710" && codec=="mpeg2" && resolution=="4k") || - (pipeline == "gst-vaapi1710" && codec=="mpeg2" && resolution=="1080p" && fix_fps > 30)) + if (mode == "encode" && tick.getCounter() >= 1000) { - cout << "Unsupported configuration" << endl; - cmd_parser->printErrors(); - return -1; + cout << "Encode frame limit reached - break" << endl; + break; } - return 0; } - - bool fast_measure; // fast measure fps - string pipeline, // gstreamer pipeline type - container, // container type - mode, // coding mode - codec, // codec type - file_name, // path to videofile - resolution; // video resolution - int fix_fps; // fixed frame per second - Size fix_size; // fixed frame size - VideoWriter wrt; - VideoCapture cap; - ostringstream stream_pipeline; - CommandLineParser* cmd_parser; -}; - -int main(int argc, char *argv[]) -{ - try + total.stop(); + if (tick.getCounter() == 0) { - GStreamerPipeline pipe(argc, argv); - return pipe.run(); + cout << "No frames have been processed" << endl; + return -10; } - catch(const Exception& e) + else { - cerr << e.what() << endl; - return 1; + double res_fps = tick.getCounter() / tick.getTimeSec(); + cout << tick.getCounter() << " frames in " << tick.getTimeSec() << " sec ~ " << res_fps << " FPS" << " (total time: " << total.getTimeSec() << " sec)" << endl; } + return 0; } diff --git a/samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp b/samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp index 87a5436a6d..d038cbd874 100644 --- a/samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp +++ b/samples/cpp/tutorial_code/ImgTrans/imageSegmentation.cpp @@ -1,5 +1,4 @@ /** - * @function Watershed_and_Distance_Transform.cpp * @brief Sample code showing how to segment overlapping objects using Laplacian filtering, in addition to Watershed and Distance Transformation * @author OpenCV Team */ @@ -12,43 +11,47 @@ using namespace std; using namespace cv; -int main() +int main(int argc, char *argv[]) { -//! [load_image] + //! [load_image] // Load the image - Mat src = imread("../data/cards.png"); - - // Check if everything was fine - if (!src.data) + CommandLineParser parser( argc, argv, "{@input | ../data/cards.png | input image}" ); + Mat src = imread( parser.get( "@input" ) ); + if( src.empty() ) + { + cout << "Could not open or find the image!\n" << endl; + cout << "Usage: " << argv[0] << " " << endl; return -1; + } // Show source image imshow("Source Image", src); -//! [load_image] + //! [load_image] -//! [black_bg] + //! [black_bg] // Change the background from white to black, since that will help later to extract // better results during the use of Distance Transform - for( int x = 0; x < src.rows; x++ ) { - for( int y = 0; y < src.cols; y++ ) { - if ( src.at(x, y) == Vec3b(255,255,255) ) { - src.at(x, y)[0] = 0; - src.at(x, y)[1] = 0; - src.at(x, y)[2] = 0; - } + for ( int i = 0; i < src.rows; i++ ) { + for ( int j = 0; j < src.cols; j++ ) { + if ( src.at(i, j) == Vec3b(255,255,255) ) + { + src.at(i, j)[0] = 0; + src.at(i, j)[1] = 0; + src.at(i, j)[2] = 0; + } } } // Show output image imshow("Black Background Image", src); -//! [black_bg] + //! [black_bg] -//! [sharp] - // Create a kernel that we will use for accuting/sharpening our image + //! [sharp] + // Create a kernel that we will use to sharpen our image Mat kernel = (Mat_(3,3) << - 1, 1, 1, - 1, -8, 1, - 1, 1, 1); // an approximation of second derivative, a quite strong kernel + 1, 1, 1, + 1, -8, 1, + 1, 1, 1); // an approximation of second derivative, a quite strong kernel // do the laplacian filtering as it is // well, we need to convert everything in something more deeper then CV_8U @@ -57,8 +60,8 @@ int main() // BUT a 8bits unsigned int (the one we are working with) can contain values from 0 to 255 // so the possible negative number will be truncated Mat imgLaplacian; - Mat sharp = src; // copy source image to another temporary one - filter2D(sharp, imgLaplacian, CV_32F, kernel); + filter2D(src, imgLaplacian, CV_32F, kernel); + Mat sharp; src.convertTo(sharp, CV_32F); Mat imgResult = sharp - imgLaplacian; @@ -68,41 +71,39 @@ int main() // imshow( "Laplace Filtered Image", imgLaplacian ); imshow( "New Sharped Image", imgResult ); -//! [sharp] + //! [sharp] - src = imgResult; // copy back - -//! [bin] + //! [bin] // Create binary image from source image Mat bw; - cvtColor(src, bw, COLOR_BGR2GRAY); + cvtColor(imgResult, bw, COLOR_BGR2GRAY); threshold(bw, bw, 40, 255, THRESH_BINARY | THRESH_OTSU); imshow("Binary Image", bw); -//! [bin] + //! [bin] -//! [dist] + //! [dist] // Perform the distance transform algorithm Mat dist; distanceTransform(bw, dist, DIST_L2, 3); // Normalize the distance image for range = {0.0, 1.0} // so we can visualize and threshold it - normalize(dist, dist, 0, 1., NORM_MINMAX); + normalize(dist, dist, 0, 1.0, NORM_MINMAX); imshow("Distance Transform Image", dist); -//! [dist] + //! [dist] -//! [peaks] + //! [peaks] // Threshold to obtain the peaks // This will be the markers for the foreground objects - threshold(dist, dist, .4, 1., THRESH_BINARY); + threshold(dist, dist, 0.4, 1.0, THRESH_BINARY); // Dilate a bit the dist image - Mat kernel1 = Mat::ones(3, 3, CV_8UC1); + Mat kernel1 = Mat::ones(3, 3, CV_8U); dilate(dist, dist, kernel1); imshow("Peaks", dist); -//! [peaks] + //! [peaks] -//! [seeds] + //! [seeds] // Create the CV_8U version of the distance image // It is needed for findContours() Mat dist_8u; @@ -113,34 +114,36 @@ int main() findContours(dist_8u, contours, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE); // Create the marker image for the watershed algorithm - Mat markers = Mat::zeros(dist.size(), CV_32SC1); + Mat markers = Mat::zeros(dist.size(), CV_32S); // Draw the foreground markers for (size_t i = 0; i < contours.size(); i++) - drawContours(markers, contours, static_cast(i), Scalar::all(static_cast(i)+1), -1); + { + drawContours(markers, contours, static_cast(i), Scalar(static_cast(i)+1), -1); + } // Draw the background marker - circle(markers, Point(5,5), 3, CV_RGB(255,255,255), -1); + circle(markers, Point(5,5), 3, Scalar(255), -1); imshow("Markers", markers*10000); -//! [seeds] + //! [seeds] -//! [watershed] + //! [watershed] // Perform the watershed algorithm - watershed(src, markers); + watershed(imgResult, markers); - Mat mark = Mat::zeros(markers.size(), CV_8UC1); - markers.convertTo(mark, CV_8UC1); + Mat mark; + markers.convertTo(mark, CV_8U); bitwise_not(mark, mark); -// imshow("Markers_v2", mark); // uncomment this if you want to see how the mark - // image looks like at that point + // imshow("Markers_v2", mark); // uncomment this if you want to see how the mark + // image looks like at that point // Generate random colors vector colors; for (size_t i = 0; i < contours.size(); i++) { - int b = theRNG().uniform(0, 255); - int g = theRNG().uniform(0, 255); - int r = theRNG().uniform(0, 255); + int b = theRNG().uniform(0, 256); + int g = theRNG().uniform(0, 256); + int r = theRNG().uniform(0, 256); colors.push_back(Vec3b((uchar)b, (uchar)g, (uchar)r)); } @@ -155,16 +158,16 @@ int main() { int index = markers.at(i,j); if (index > 0 && index <= static_cast(contours.size())) + { dst.at(i,j) = colors[index-1]; - else - dst.at(i,j) = Vec3b(0,0,0); + } } } // Visualize the final image imshow("Final Result", dst); -//! [watershed] + //! [watershed] - waitKey(0); + waitKey(); return 0; } diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp index 084d41bb5f..922bdcc9a0 100644 --- a/samples/dnn/object_detection.cpp +++ b/samples/dnn/object_detection.cpp @@ -22,6 +22,7 @@ const char* keys = "{ height | -1 | Preprocess input image by resizing to a specific height. }" "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ thr | .5 | Confidence threshold. }" + "{ thr | .4 | Non-maximum suppression threshold. }" "{ backend | 0 | Choose one of computation backends: " "0: automatically (by default), " "1: Halide language (http://halide-lang.org/), " @@ -37,7 +38,7 @@ const char* keys = using namespace cv; using namespace dnn; -float confThreshold; +float confThreshold, nmsThreshold; std::vector classes; void postprocess(Mat& frame, const std::vector& out, Net& net); @@ -59,6 +60,7 @@ int main(int argc, char** argv) } confThreshold = parser.get("thr"); + nmsThreshold = parser.get("nms"); float scale = parser.get("scale"); Scalar mean = parser.get("mean"); bool swapRB = parser.get("rgb"); @@ -144,6 +146,9 @@ void postprocess(Mat& frame, const std::vector& outs, Net& net) static std::vector outLayers = net.getUnconnectedOutLayers(); static std::string outLayerType = net.getLayer(outLayers[0])->type; + std::vector classIds; + std::vector confidences; + std::vector boxes; if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN { // Network produces output blob with a shape 1x1xNx7 where N is a number of @@ -160,8 +165,11 @@ void postprocess(Mat& frame, const std::vector& outs, Net& net) int top = (int)data[i + 4]; int right = (int)data[i + 5]; int bottom = (int)data[i + 6]; - int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. - drawPred(classId, confidence, left, top, right, bottom, frame); + int width = right - left + 1; + int height = bottom - top + 1; + classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. + boxes.push_back(Rect(left, top, width, height)); + confidences.push_back(confidence); } } } @@ -181,16 +189,16 @@ void postprocess(Mat& frame, const std::vector& outs, Net& net) int top = (int)(data[i + 4] * frame.rows); int right = (int)(data[i + 5] * frame.cols); int bottom = (int)(data[i + 6] * frame.rows); - int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. - drawPred(classId, confidence, left, top, right, bottom, frame); + int width = right - left + 1; + int height = bottom - top + 1; + classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. + boxes.push_back(Rect(left, top, width, height)); + confidences.push_back(confidence); } } } else if (outLayerType == "Region") { - std::vector classIds; - std::vector confidences; - std::vector boxes; for (size_t i = 0; i < outs.size(); ++i) { // Network produces output blob with a shape NxC where N is a number of @@ -218,18 +226,19 @@ void postprocess(Mat& frame, const std::vector& outs, Net& net) } } } - std::vector indices; - NMSBoxes(boxes, confidences, confThreshold, 0.4f, indices); - for (size_t i = 0; i < indices.size(); ++i) - { - int idx = indices[i]; - Rect box = boxes[idx]; - drawPred(classIds[idx], confidences[idx], box.x, box.y, - box.x + box.width, box.y + box.height, frame); - } } else CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); + + std::vector indices; + NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); + for (size_t i = 0; i < indices.size(); ++i) + { + int idx = indices[i]; + Rect box = boxes[idx]; + drawPred(classIds[idx], confidences[idx], box.x, box.y, + box.x + box.width, box.y + box.height, frame); + } } void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index b191cd4925..386e02890d 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -31,6 +31,7 @@ parser.add_argument('--height', type=int, parser.add_argument('--rgb', action='store_true', help='Indicate that model works with RGB input images instead BGR ones.') parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') +parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold') parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, help="Choose one of computation backends: " "%d: automatically (by default), " @@ -57,6 +58,7 @@ net.setPreferableBackend(args.backend) net.setPreferableTarget(args.target) confThreshold = args.thr +nmsThreshold = args.nms def getOutputsNames(net): layersNames = net.getLayerNames() @@ -86,36 +88,43 @@ def postprocess(frame, outs): lastLayerId = net.getLayerId(layerNames[-1]) lastLayer = net.getLayer(lastLayerId) + classIds = [] + confidences = [] + boxes = [] if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN # Network produces output blob with a shape 1x1xNx7 where N is a number of # detections and an every detection is a vector of values # [batchId, classId, confidence, left, top, right, bottom] - assert(len(outs) == 1) - out = outs[0] - for detection in out[0, 0]: - confidence = detection[2] - if confidence > confThreshold: - left = int(detection[3]) - top = int(detection[4]) - right = int(detection[5]) - bottom = int(detection[6]) - classId = int(detection[1]) - 1 # Skip background label - drawPred(classId, confidence, left, top, right, bottom) + for out in outs: + for detection in out[0, 0]: + confidence = detection[2] + if confidence > confThreshold: + left = int(detection[3]) + top = int(detection[4]) + right = int(detection[5]) + bottom = int(detection[6]) + width = right - left + 1 + height = bottom - top + 1 + classIds.append(int(detection[1]) - 1) # Skip background label + confidences.append(float(confidence)) + boxes.append([left, top, width, height]) elif lastLayer.type == 'DetectionOutput': # Network produces output blob with a shape 1x1xNx7 where N is a number of # detections and an every detection is a vector of values # [batchId, classId, confidence, left, top, right, bottom] - assert(len(outs) == 1) - out = outs[0] - for detection in out[0, 0]: - confidence = detection[2] - if confidence > confThreshold: - left = int(detection[3] * frameWidth) - top = int(detection[4] * frameHeight) - right = int(detection[5] * frameWidth) - bottom = int(detection[6] * frameHeight) - classId = int(detection[1]) - 1 # Skip background label - drawPred(classId, confidence, left, top, right, bottom) + for out in outs: + for detection in out[0, 0]: + confidence = detection[2] + if confidence > confThreshold: + left = int(detection[3] * frameWidth) + top = int(detection[4] * frameHeight) + right = int(detection[5] * frameWidth) + bottom = int(detection[6] * frameHeight) + width = right - left + 1 + height = bottom - top + 1 + classIds.append(int(detection[1]) - 1) # Skip background label + confidences.append(float(confidence)) + boxes.append([left, top, width, height]) elif lastLayer.type == 'Region': # Network produces output blob with a shape NxC where N is a number of # detected objects and C is a number of classes + 4 where the first 4 @@ -138,15 +147,19 @@ def postprocess(frame, outs): classIds.append(classId) confidences.append(float(confidence)) boxes.append([left, top, width, height]) - indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, 0.4) - for i in indices: - i = i[0] - box = boxes[i] - left = box[0] - top = box[1] - width = box[2] - height = box[3] - drawPred(classIds[i], confidences[i], left, top, left + width, top + height) + else: + print('Unknown output layer type: ' + lastLayer.type) + exit() + + indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) + for i in indices: + i = i[0] + box = boxes[i] + left = box[0] + top = box[1] + width = box[2] + height = box[3] + drawPred(classIds[i], confidences[i], left, top, left + width, top + height) # Process inputs winName = 'Deep learning object detection in OpenCV' diff --git a/samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java b/samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java new file mode 100644 index 0000000000..1a26092f64 --- /dev/null +++ b/samples/java/tutorial_code/ImgTrans/distance_transformation/ImageSegmentationDemo.java @@ -0,0 +1,215 @@ +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.opencv.core.Core; +import org.opencv.core.CvType; +import org.opencv.core.Mat; +import org.opencv.core.MatOfPoint; +import org.opencv.core.Point; +import org.opencv.core.Scalar; +import org.opencv.highgui.HighGui; +import org.opencv.imgcodecs.Imgcodecs; +import org.opencv.imgproc.Imgproc; + +/** + * + * @brief Sample code showing how to segment overlapping objects using Laplacian filtering, in addition to Watershed + * and Distance Transformation + * + */ +class ImageSegmentation { + public void run(String[] args) { + //! [load_image] + // Load the image + String filename = args.length > 0 ? args[0] : "../data/cards.png"; + Mat srcOriginal = Imgcodecs.imread(filename); + if (srcOriginal.empty()) { + System.err.println("Cannot read image: " + filename); + System.exit(0); + } + + // Show source image + HighGui.imshow("Source Image", srcOriginal); + //! [load_image] + + //! [black_bg] + // Change the background from white to black, since that will help later to + // extract + // better results during the use of Distance Transform + Mat src = srcOriginal.clone(); + byte[] srcData = new byte[(int) (src.total() * src.channels())]; + src.get(0, 0, srcData); + for (int i = 0; i < src.rows(); i++) { + for (int j = 0; j < src.cols(); j++) { + if (srcData[(i * src.cols() + j) * 3] == (byte) 255 && srcData[(i * src.cols() + j) * 3 + 1] == (byte) 255 + && srcData[(i * src.cols() + j) * 3 + 2] == (byte) 255) { + srcData[(i * src.cols() + j) * 3] = 0; + srcData[(i * src.cols() + j) * 3 + 1] = 0; + srcData[(i * src.cols() + j) * 3 + 2] = 0; + } + } + } + src.put(0, 0, srcData); + + // Show output image + HighGui.imshow("Black Background Image", src); + //! [black_bg] + + //! [sharp] + // Create a kernel that we will use to sharpen our image + Mat kernel = new Mat(3, 3, CvType.CV_32F); + // an approximation of second derivative, a quite strong kernel + float[] kernelData = new float[(int) (kernel.total() * kernel.channels())]; + kernelData[0] = 1; kernelData[1] = 1; kernelData[2] = 1; + kernelData[3] = 1; kernelData[4] = -8; kernelData[5] = 1; + kernelData[6] = 1; kernelData[7] = 1; kernelData[8] = 1; + kernel.put(0, 0, kernelData); + + // do the laplacian filtering as it is + // well, we need to convert everything in something more deeper then CV_8U + // because the kernel has some negative values, + // and we can expect in general to have a Laplacian image with negative values + // BUT a 8bits unsigned int (the one we are working with) can contain values + // from 0 to 255 + // so the possible negative number will be truncated + Mat imgLaplacian = new Mat(); + Imgproc.filter2D(src, imgLaplacian, CvType.CV_32F, kernel); + Mat sharp = new Mat(); + src.convertTo(sharp, CvType.CV_32F); + Mat imgResult = new Mat(); + Core.subtract(sharp, imgLaplacian, imgResult); + + // convert back to 8bits gray scale + imgResult.convertTo(imgResult, CvType.CV_8UC3); + imgLaplacian.convertTo(imgLaplacian, CvType.CV_8UC3); + + // imshow( "Laplace Filtered Image", imgLaplacian ); + HighGui.imshow("New Sharped Image", imgResult); + //! [sharp] + + //! [bin] + // Create binary image from source image + Mat bw = new Mat(); + Imgproc.cvtColor(imgResult, bw, Imgproc.COLOR_BGR2GRAY); + Imgproc.threshold(bw, bw, 40, 255, Imgproc.THRESH_BINARY | Imgproc.THRESH_OTSU); + HighGui.imshow("Binary Image", bw); + //! [bin] + + //! [dist] + // Perform the distance transform algorithm + Mat dist = new Mat(); + Imgproc.distanceTransform(bw, dist, Imgproc.DIST_L2, 3); + + // Normalize the distance image for range = {0.0, 1.0} + // so we can visualize and threshold it + Core.normalize(dist, dist, 0, 1., Core.NORM_MINMAX); + Mat distDisplayScaled = dist.mul(dist, 255); + Mat distDisplay = new Mat(); + distDisplayScaled.convertTo(distDisplay, CvType.CV_8U); + HighGui.imshow("Distance Transform Image", distDisplay); + //! [dist] + + //! [peaks] + // Threshold to obtain the peaks + // This will be the markers for the foreground objects + Imgproc.threshold(dist, dist, .4, 1., Imgproc.THRESH_BINARY); + + // Dilate a bit the dist image + Mat kernel1 = Mat.ones(3, 3, CvType.CV_8U); + Imgproc.dilate(dist, dist, kernel1); + Mat distDisplay2 = new Mat(); + dist.convertTo(distDisplay2, CvType.CV_8U); + distDisplay2 = distDisplay2.mul(distDisplay2, 255); + HighGui.imshow("Peaks", distDisplay2); + //! [peaks] + + //! [seeds] + // Create the CV_8U version of the distance image + // It is needed for findContours() + Mat dist_8u = new Mat(); + dist.convertTo(dist_8u, CvType.CV_8U); + + // Find total markers + List contours = new ArrayList<>(); + Mat hierarchy = new Mat(); + Imgproc.findContours(dist_8u, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE); + + // Create the marker image for the watershed algorithm + Mat markers = Mat.zeros(dist.size(), CvType.CV_32S); + + // Draw the foreground markers + for (int i = 0; i < contours.size(); i++) { + Imgproc.drawContours(markers, contours, i, new Scalar(i + 1), -1); + } + + // Draw the background marker + Imgproc.circle(markers, new Point(5, 5), 3, new Scalar(255, 255, 255), -1); + Mat markersScaled = markers.mul(markers, 10000); + Mat markersDisplay = new Mat(); + markersScaled.convertTo(markersDisplay, CvType.CV_8U); + HighGui.imshow("Markers", markersDisplay); + //! [seeds] + + //! [watershed] + // Perform the watershed algorithm + Imgproc.watershed(imgResult, markers); + + Mat mark = Mat.zeros(markers.size(), CvType.CV_8U); + markers.convertTo(mark, CvType.CV_8UC1); + Core.bitwise_not(mark, mark); + // imshow("Markers_v2", mark); // uncomment this if you want to see how the mark + // image looks like at that point + + // Generate random colors + Random rng = new Random(12345); + List colors = new ArrayList<>(contours.size()); + for (int i = 0; i < contours.size(); i++) { + int b = rng.nextInt(256); + int g = rng.nextInt(256); + int r = rng.nextInt(256); + + colors.add(new Scalar(b, g, r)); + } + + // Create the result image + Mat dst = Mat.zeros(markers.size(), CvType.CV_8UC3); + byte[] dstData = new byte[(int) (dst.total() * dst.channels())]; + dst.get(0, 0, dstData); + + // Fill labeled objects with random colors + int[] markersData = new int[(int) (markers.total() * markers.channels())]; + markers.get(0, 0, markersData); + for (int i = 0; i < markers.rows(); i++) { + for (int j = 0; j < markers.cols(); j++) { + int index = markersData[i * markers.cols() + j]; + if (index > 0 && index <= contours.size()) { + dstData[(i * dst.cols() + j) * 3 + 0] = (byte) colors.get(index - 1).val[0]; + dstData[(i * dst.cols() + j) * 3 + 1] = (byte) colors.get(index - 1).val[1]; + dstData[(i * dst.cols() + j) * 3 + 2] = (byte) colors.get(index - 1).val[2]; + } else { + dstData[(i * dst.cols() + j) * 3 + 0] = 0; + dstData[(i * dst.cols() + j) * 3 + 1] = 0; + dstData[(i * dst.cols() + j) * 3 + 2] = 0; + } + } + } + dst.put(0, 0, dstData); + + // Visualize the final image + HighGui.imshow("Final Result", dst); + //! [watershed] + + HighGui.waitKey(); + System.exit(0); + } +} + +public class ImageSegmentationDemo { + public static void main(String[] args) { + // Load the native OpenCV library + System.loadLibrary(Core.NATIVE_LIBRARY_NAME); + + new ImageSegmentation().run(args); + } +} diff --git a/samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py b/samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py new file mode 100644 index 0000000000..e679001bc1 --- /dev/null +++ b/samples/python/tutorial_code/ImgTrans/distance_transformation/imageSegmentation.py @@ -0,0 +1,138 @@ +from __future__ import print_function +import cv2 as cv +import numpy as np +import argparse +import random as rng + +rng.seed(12345) + +## [load_image] +# Load the image +parser = argparse.ArgumentParser(description='Code for Image Segmentation with Distance Transform and Watershed Algorithm.\ + Sample code showing how to segment overlapping objects using Laplacian filtering, \ + in addition to Watershed and Distance Transformation') +parser.add_argument('--input', help='Path to input image.', default='../data/cards.png') +args = parser.parse_args() + +src = cv.imread(args.input) +if src is None: + print('Could not open or find the image:', args.input) + exit(0) + +# Show source image +cv.imshow('Source Image', src) +## [load_image] + +## [black_bg] +# Change the background from white to black, since that will help later to extract +# better results during the use of Distance Transform +src[np.all(src == 255, axis=2)] = 0 + +# Show output image +cv.imshow('Black Background Image', src) +## [black_bg] + +## [sharp] +# Create a kernel that we will use to sharpen our image +# an approximation of second derivative, a quite strong kernel +kernel = np.array([[1, 1, 1], [1, -8, 1], [1, 1, 1]], dtype=np.float32) + +# do the laplacian filtering as it is +# well, we need to convert everything in something more deeper then CV_8U +# because the kernel has some negative values, +# and we can expect in general to have a Laplacian image with negative values +# BUT a 8bits unsigned int (the one we are working with) can contain values from 0 to 255 +# so the possible negative number will be truncated +imgLaplacian = cv.filter2D(src, cv.CV_32F, kernel) +sharp = np.float32(src) +imgResult = sharp - imgLaplacian + +# convert back to 8bits gray scale +imgResult = np.clip(imgResult, 0, 255) +imgResult = imgResult.astype('uint8') +imgLaplacian = np.clip(imgLaplacian, 0, 255) +imgLaplacian = np.uint8(imgLaplacian) + +#cv.imshow('Laplace Filtered Image', imgLaplacian) +cv.imshow('New Sharped Image', imgResult) +## [sharp] + +## [bin] +# Create binary image from source image +bw = cv.cvtColor(imgResult, cv.COLOR_BGR2GRAY) +_, bw = cv.threshold(bw, 40, 255, cv.THRESH_BINARY | cv.THRESH_OTSU) +cv.imshow('Binary Image', bw) +## [bin] + +## [dist] +# Perform the distance transform algorithm +dist = cv.distanceTransform(bw, cv.DIST_L2, 3) + +# Normalize the distance image for range = {0.0, 1.0} +# so we can visualize and threshold it +cv.normalize(dist, dist, 0, 1.0, cv.NORM_MINMAX) +cv.imshow('Distance Transform Image', dist) +## [dist] + +## [peaks] +# Threshold to obtain the peaks +# This will be the markers for the foreground objects +_, dist = cv.threshold(dist, 0.4, 1.0, cv.THRESH_BINARY) + +# Dilate a bit the dist image +kernel1 = np.ones((3,3), dtype=np.uint8) +dist = cv.dilate(dist, kernel1) +cv.imshow('Peaks', dist) +## [peaks] + +## [seeds] +# Create the CV_8U version of the distance image +# It is needed for findContours() +dist_8u = dist.astype('uint8') + +# Find total markers +_, contours, _ = cv.findContours(dist_8u, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) + +# Create the marker image for the watershed algorithm +markers = np.zeros(dist.shape, dtype=np.int32) + +# Draw the foreground markers +for i in range(len(contours)): + cv.drawContours(markers, contours, i, (i+1), -1) + +# Draw the background marker +cv.circle(markers, (5,5), 3, (255,255,255), -1) +cv.imshow('Markers', markers*10000) +## [seeds] + +## [watershed] +# Perform the watershed algorithm +cv.watershed(imgResult, markers) + +#mark = np.zeros(markers.shape, dtype=np.uint8) +mark = markers.astype('uint8') +mark = cv.bitwise_not(mark) +# uncomment this if you want to see how the mark +# image looks like at that point +#cv.imshow('Markers_v2', mark) + +# Generate random colors +colors = [] +for contour in contours: + colors.append((rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))) + +# Create the result image +dst = np.zeros((markers.shape[0], markers.shape[1], 3), dtype=np.uint8) + +# Fill labeled objects with random colors +for i in range(markers.shape[0]): + for j in range(markers.shape[1]): + index = markers[i,j] + if index > 0 and index <= len(contours): + dst[i,j,:] = colors[index-1] + +# Visualize the final image +cv.imshow('Final Result', dst) +## [watershed] + +cv.waitKey() diff --git a/samples/python/tutorial_code/features2D/feature_flann_matcher/SURF_FLANN_matching_Demo.py b/samples/python/tutorial_code/features2D/feature_flann_matcher/SURF_FLANN_matching_Demo.py index d22f9a8a6f..1a65d324fd 100644 --- a/samples/python/tutorial_code/features2D/feature_flann_matcher/SURF_FLANN_matching_Demo.py +++ b/samples/python/tutorial_code/features2D/feature_flann_matcher/SURF_FLANN_matching_Demo.py @@ -28,10 +28,9 @@ knn_matches = matcher.knnMatch(descriptors1, descriptors2, 2) #-- Filter matches using the Lowe's ratio test ratio_thresh = 0.7 good_matches = [] -for matches in knn_matches: - if len(matches) > 1: - if matches[0].distance / matches[1].distance <= ratio_thresh: - good_matches.append(matches[0]) +for m,n in knn_matches: + if m.distance / n.distance <= ratio_thresh: + good_matches.append(m) #-- Draw matches img_matches = np.empty((max(img1.shape[0], img2.shape[0]), img1.shape[1]+img2.shape[1], 3), dtype=np.uint8) diff --git a/samples/python/tutorial_code/features2D/feature_homography/SURF_FLANN_matching_homography_Demo.py b/samples/python/tutorial_code/features2D/feature_homography/SURF_FLANN_matching_homography_Demo.py index 8820addce2..5172b4f303 100644 --- a/samples/python/tutorial_code/features2D/feature_homography/SURF_FLANN_matching_homography_Demo.py +++ b/samples/python/tutorial_code/features2D/feature_homography/SURF_FLANN_matching_homography_Demo.py @@ -28,10 +28,9 @@ knn_matches = matcher.knnMatch(descriptors_obj, descriptors_scene, 2) #-- Filter matches using the Lowe's ratio test ratio_thresh = 0.75 good_matches = [] -for matches in knn_matches: - if len(matches) > 1: - if matches[0].distance / matches[1].distance <= ratio_thresh: - good_matches.append(matches[0]) +for m,n in knn_matches: + if m.distance / n.distance <= ratio_thresh: + good_matches.append(m) #-- Draw matches img_matches = np.empty((max(img_object.shape[0], img_scene.shape[0]), img_object.shape[1]+img_scene.shape[1], 3), dtype=np.uint8)