diff --git a/CMakeLists.txt b/CMakeLists.txt index 51e0b19f94..d01c06d720 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -275,7 +275,7 @@ OCV_OPTION(WITH_VA "Include VA support" OFF OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF IF ((UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)) ) OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF IF (NOT ANDROID AND NOT IOS AND NOT WINRT) ) -OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" ON IF (UNIX AND NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" OFF IF (UNIX AND NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_LAPACK "Include Lapack library support" (NOT CV_DISABLE_OPTIMIZATION) IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_ITT "Include Intel ITT support" ON IF (NOT APPLE_FRAMEWORK) ) OCV_OPTION(WITH_PROTOBUF "Enable libprotobuf" ON ) diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index e5e64fc6db..e36eb0852b 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -78,9 +78,9 @@ endif() if(INF_ENGINE_TARGET) if(NOT INF_ENGINE_RELEASE) - message(WARNING "InferenceEngine version have not been set, 2018R2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") + message(WARNING "InferenceEngine version have not been set, 2018R3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") endif() - set(INF_ENGINE_RELEASE "2018020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)") + set(INF_ENGINE_RELEASE "2018030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)") set_target_properties(${INF_ENGINE_TARGET} PROPERTIES INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" ) diff --git a/cmake/OpenCVFindVA.cmake b/cmake/OpenCVFindVA.cmake index 5079e56316..37e916d1fd 100644 --- a/cmake/OpenCVFindVA.cmake +++ b/cmake/OpenCVFindVA.cmake @@ -12,7 +12,9 @@ endif() if(VA_INCLUDE_DIR) set(HAVE_VA TRUE) - set(VA_LIBRARIES "-lva" "-lva-drm") + if(NOT DEFINED VA_LIBRARIES) + set(VA_LIBRARIES "va" "va-drm") + endif() else() set(HAVE_VA FALSE) message(WARNING "libva installation is not found.") diff --git a/doc/tutorials/dnn/dnn_android/dnn_android.markdown b/doc/tutorials/dnn/dnn_android/dnn_android.markdown index 0fed10e487..a432b38204 100644 --- a/doc/tutorials/dnn/dnn_android/dnn_android.markdown +++ b/doc/tutorials/dnn/dnn_android/dnn_android.markdown @@ -12,7 +12,7 @@ Tutorial was written for the following versions of corresponding software: - Download and install Android Studio from https://developer.android.com/studio. -- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.2-android-sdk.zip`). +- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.3-android-sdk.zip`). - Download MobileNet object detection model from https://github.com/chuanqi305/MobileNet-SSD. We need a configuration file `MobileNetSSD_deploy.prototxt` and weights `MobileNetSSD_deploy.caffemodel`. diff --git a/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown b/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown index ce6c868165..ec44a0f59f 100644 --- a/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown +++ b/doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown @@ -36,14 +36,14 @@ Open your Doxyfile using your favorite text editor and search for the key `TAGFILES`. Change it as follows: @code -TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2 +TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.3 @endcode If you had other definitions already, you can append the line using a `\`: @code TAGFILES = ./docs/doxygen-tags/libstdc++.tag=https://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen \ - ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2 + ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.3 @endcode Doxygen can now use the information from the tag file to link to the OpenCV diff --git a/modules/core/misc/java/src/java/core+MatOfRotatedRect.java b/modules/core/misc/java/src/java/core+MatOfRotatedRect.java new file mode 100644 index 0000000000..6f36e6ca6c --- /dev/null +++ b/modules/core/misc/java/src/java/core+MatOfRotatedRect.java @@ -0,0 +1,86 @@ +package org.opencv.core; + +import java.util.Arrays; +import java.util.List; + +import org.opencv.core.RotatedRect; + + + +public class MatOfRotatedRect extends Mat { + // 32FC5 + private static final int _depth = CvType.CV_32F; + private static final int _channels = 5; + + public MatOfRotatedRect() { + super(); + } + + protected MatOfRotatedRect(long addr) { + super(addr); + if( !empty() && checkVector(_channels, _depth) < 0 ) + throw new IllegalArgumentException("Incompatible Mat"); + //FIXME: do we need release() here? + } + + public static MatOfRotatedRect fromNativeAddr(long addr) { + return new MatOfRotatedRect(addr); + } + + public MatOfRotatedRect(Mat m) { + super(m, Range.all()); + if( !empty() && checkVector(_channels, _depth) < 0 ) + throw new IllegalArgumentException("Incompatible Mat"); + //FIXME: do we need release() here? + } + + public MatOfRotatedRect(RotatedRect...a) { + super(); + fromArray(a); + } + + public void alloc(int elemNumber) { + if(elemNumber>0) + super.create(elemNumber, 1, CvType.makeType(_depth, _channels)); + } + + public void fromArray(RotatedRect...a) { + if(a==null || a.length==0) + return; + int num = a.length; + alloc(num); + float buff[] = new float[num * _channels]; + for(int i=0; i lr) { + RotatedRect ap[] = lr.toArray(new RotatedRect[0]); + fromArray(ap); + } + + public List toList() { + RotatedRect[] ar = toArray(); + return Arrays.asList(ar); + } +} diff --git a/modules/core/misc/java/test/RotatedRectTest.java b/modules/core/misc/java/test/RotatedRectTest.java index fc215fc661..b1b4a677d3 100644 --- a/modules/core/misc/java/test/RotatedRectTest.java +++ b/modules/core/misc/java/test/RotatedRectTest.java @@ -1,11 +1,16 @@ package org.opencv.test.core; +import org.opencv.core.CvType; import org.opencv.core.Point; import org.opencv.core.Rect; import org.opencv.core.RotatedRect; +import org.opencv.core.MatOfRotatedRect; import org.opencv.core.Size; import org.opencv.test.OpenCVTestCase; +import java.util.Arrays; +import java.util.List; + public class RotatedRectTest extends OpenCVTestCase { private double angle; @@ -188,4 +193,21 @@ public class RotatedRectTest extends OpenCVTestCase { assertEquals(expected, actual); } + public void testMatOfRotatedRect() { + RotatedRect a = new RotatedRect(new Point(1,2),new Size(3,4),5.678); + RotatedRect b = new RotatedRect(new Point(9,8),new Size(7,6),5.432); + MatOfRotatedRect m = new MatOfRotatedRect(a,b,a,b,a,b,a,b); + assertEquals(m.rows(), 8); + assertEquals(m.cols(), 1); + assertEquals(m.type(), CvType.CV_32FC(5)); + RotatedRect[] arr = m.toArray(); + assertEquals(arr[2].angle, a.angle, EPS); + assertEquals(arr[3].center.x, b.center.x); + assertEquals(arr[3].size.width, b.size.width); + List li = m.toList(); + assertEquals(li.size(), 8); + RotatedRect rr = li.get(7); + assertEquals(rr.angle, b.angle, EPS); + assertEquals(rr.center.y, b.center.y); + } } diff --git a/modules/core/src/va_intel.cpp b/modules/core/src/va_intel.cpp index 0a2bfd96a3..a3baa4bf0b 100644 --- a/modules/core/src/va_intel.cpp +++ b/modules/core/src/va_intel.cpp @@ -324,6 +324,163 @@ static void copy_convert_bgr_to_nv12(const VAImage& image, const Mat& bgr, unsig dstUV += dstStepUV; } } + + +static void copy_convert_yv12_to_bgr(const VAImage& image, const unsigned char* buffer, Mat& bgr) +{ + const float d1 = 16.0f; + const float d2 = 128.0f; + + static const float coeffs[5] = + { + 1.163999557f, + 2.017999649f, + -0.390999794f, + -0.812999725f, + 1.5959997177f + }; + + CV_CheckEQ(image.format.fourcc, VA_FOURCC_YV12, "Unexpected image format"); + CV_CheckEQ(image.num_planes, 3, ""); + + const size_t srcOffsetY = image.offsets[0]; + const size_t srcOffsetV = image.offsets[1]; + const size_t srcOffsetU = image.offsets[2]; + + const size_t srcStepY = image.pitches[0]; + const size_t srcStepU = image.pitches[1]; + const size_t srcStepV = image.pitches[2]; + + const size_t dstStep = bgr.step; + + const unsigned char* srcY_ = buffer + srcOffsetY; + const unsigned char* srcV_ = buffer + srcOffsetV; + const unsigned char* srcU_ = buffer + srcOffsetU; + + for (int y = 0; y < bgr.rows; y += 2) + { + const unsigned char* srcY0 = srcY_ + (srcStepY) * y; + const unsigned char* srcY1 = srcY0 + srcStepY; + + const unsigned char* srcV = srcV_ + (srcStepV) * y / 2; + const unsigned char* srcU = srcU_ + (srcStepU) * y / 2; + + unsigned char* dst0 = bgr.data + (dstStep) * y; + unsigned char* dst1 = dst0 + dstStep; + + for (int x = 0; x < bgr.cols; x += 2) + { + float Y0 = float(srcY0[x+0]); + float Y1 = float(srcY0[x+1]); + float Y2 = float(srcY1[x+0]); + float Y3 = float(srcY1[x+1]); + + float U = float(srcU[x/2]) - d2; + float V = float(srcV[x/2]) - d2; + + Y0 = std::max(0.0f, Y0 - d1) * coeffs[0]; + Y1 = std::max(0.0f, Y1 - d1) * coeffs[0]; + Y2 = std::max(0.0f, Y2 - d1) * coeffs[0]; + Y3 = std::max(0.0f, Y3 - d1) * coeffs[0]; + + float ruv = coeffs[4]*V; + float guv = coeffs[3]*V + coeffs[2]*U; + float buv = coeffs[1]*U; + + dst0[(x+0)*NCHANNELS+0] = saturate_cast(Y0 + buv); + dst0[(x+0)*NCHANNELS+1] = saturate_cast(Y0 + guv); + dst0[(x+0)*NCHANNELS+2] = saturate_cast(Y0 + ruv); + + dst0[(x+1)*NCHANNELS+0] = saturate_cast(Y1 + buv); + dst0[(x+1)*NCHANNELS+1] = saturate_cast(Y1 + guv); + dst0[(x+1)*NCHANNELS+2] = saturate_cast(Y1 + ruv); + + dst1[(x+0)*NCHANNELS+0] = saturate_cast(Y2 + buv); + dst1[(x+0)*NCHANNELS+1] = saturate_cast(Y2 + guv); + dst1[(x+0)*NCHANNELS+2] = saturate_cast(Y2 + ruv); + + dst1[(x+1)*NCHANNELS+0] = saturate_cast(Y3 + buv); + dst1[(x+1)*NCHANNELS+1] = saturate_cast(Y3 + guv); + dst1[(x+1)*NCHANNELS+2] = saturate_cast(Y3 + ruv); + } + } +} + +static void copy_convert_bgr_to_yv12(const VAImage& image, const Mat& bgr, unsigned char* buffer) +{ + const float d1 = 16.0f; + const float d2 = 128.0f; + + static const float coeffs[8] = + { + 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, + -0.2909994125f, 0.438999176f, -0.3679990768f, -0.0709991455f + }; + + CV_CheckEQ(image.format.fourcc, VA_FOURCC_YV12, "Unexpected image format"); + CV_CheckEQ(image.num_planes, 3, ""); + + const size_t dstOffsetY = image.offsets[0]; + const size_t dstOffsetV = image.offsets[1]; + const size_t dstOffsetU = image.offsets[2]; + + const size_t dstStepY = image.pitches[0]; + const size_t dstStepU = image.pitches[1]; + const size_t dstStepV = image.pitches[2]; + + unsigned char* dstY_ = buffer + dstOffsetY; + unsigned char* dstV_ = buffer + dstOffsetV; + unsigned char* dstU_ = buffer + dstOffsetU; + + const size_t srcStep = bgr.step; + + for (int y = 0; y < bgr.rows; y += 2) + { + unsigned char* dstY0 = dstY_ + (dstStepY) * y; + unsigned char* dstY1 = dstY0 + dstStepY; + + unsigned char* dstV = dstV_ + (dstStepV) * y / 2; + unsigned char* dstU = dstU_ + (dstStepU) * y / 2; + + const unsigned char* src0 = bgr.data + (srcStep) * y; + const unsigned char* src1 = src0 + srcStep; + + for (int x = 0; x < bgr.cols; x += 2) + { + float B0 = float(src0[(x+0)*NCHANNELS+0]); + float G0 = float(src0[(x+0)*NCHANNELS+1]); + float R0 = float(src0[(x+0)*NCHANNELS+2]); + + float B1 = float(src0[(x+1)*NCHANNELS+0]); + float G1 = float(src0[(x+1)*NCHANNELS+1]); + float R1 = float(src0[(x+1)*NCHANNELS+2]); + + float B2 = float(src1[(x+0)*NCHANNELS+0]); + float G2 = float(src1[(x+0)*NCHANNELS+1]); + float R2 = float(src1[(x+0)*NCHANNELS+2]); + + float B3 = float(src1[(x+1)*NCHANNELS+0]); + float G3 = float(src1[(x+1)*NCHANNELS+1]); + float R3 = float(src1[(x+1)*NCHANNELS+2]); + + float Y0 = coeffs[0]*R0 + coeffs[1]*G0 + coeffs[2]*B0 + d1; + float Y1 = coeffs[0]*R1 + coeffs[1]*G1 + coeffs[2]*B1 + d1; + float Y2 = coeffs[0]*R2 + coeffs[1]*G2 + coeffs[2]*B2 + d1; + float Y3 = coeffs[0]*R3 + coeffs[1]*G3 + coeffs[2]*B3 + d1; + + float U = coeffs[3]*R0 + coeffs[4]*G0 + coeffs[5]*B0 + d2; + float V = coeffs[5]*R0 + coeffs[6]*G0 + coeffs[7]*B0 + d2; + + dstY0[x+0] = saturate_cast(Y0); + dstY0[x+1] = saturate_cast(Y1); + dstY1[x+0] = saturate_cast(Y2); + dstY1[x+1] = saturate_cast(Y3); + + dstU[x/2] = saturate_cast(U); + dstV[x/2] = saturate_cast(V); + } + } +} #endif // HAVE_VA void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size) @@ -412,9 +569,12 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, if (status != VA_STATUS_SUCCESS) CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed"); - CV_Assert(image.format.fourcc == VA_FOURCC_NV12); - - copy_convert_bgr_to_nv12(image, m, buffer); + if (image.format.fourcc == VA_FOURCC_NV12) + copy_convert_bgr_to_nv12(image, m, buffer); + if (image.format.fourcc == VA_FOURCC_YV12) + copy_convert_bgr_to_yv12(image, m, buffer); + else + CV_Check((int)image.format.fourcc, image.format.fourcc == VA_FOURCC_NV12 || image.format.fourcc == VA_FOURCC_YV12, "Unexpected image format"); status = vaUnmapBuffer(display, image.buf); if (status != VA_STATUS_SUCCESS) @@ -510,9 +670,12 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out if (status != VA_STATUS_SUCCESS) CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed"); - CV_Assert(image.format.fourcc == VA_FOURCC_NV12); - - copy_convert_nv12_to_bgr(image, buffer, m); + if (image.format.fourcc == VA_FOURCC_NV12) + copy_convert_nv12_to_bgr(image, buffer, m); + if (image.format.fourcc == VA_FOURCC_YV12) + copy_convert_yv12_to_bgr(image, buffer, m); + else + CV_Check((int)image.format.fourcc, image.format.fourcc == VA_FOURCC_NV12 || image.format.fourcc == VA_FOURCC_YV12, "Unexpected image format"); status = vaUnmapBuffer(display, image.buf); if (status != VA_STATUS_SUCCESS) diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index b5117154a7..c81f8d83e1 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2158,4 +2158,71 @@ TEST(Core_Norm, IPP_regression_NORM_L1_16UC3_small) EXPECT_EQ((double)20*cn, cv::norm(a, b, NORM_L1, mask)); } + +TEST(Core_ConvertTo, regression_12121) +{ + { + Mat src(4, 64, CV_32SC1, Scalar(-1)); + Mat dst; + src.convertTo(dst, CV_8U); + EXPECT_EQ(0, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(INT_MIN)); + Mat dst; + src.convertTo(dst, CV_8U); + EXPECT_EQ(0, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32767)); + Mat dst; + src.convertTo(dst, CV_8U); + EXPECT_EQ(0, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32768)); + Mat dst; + src.convertTo(dst, CV_8U); + EXPECT_EQ(0, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(32768)); + Mat dst; + src.convertTo(dst, CV_8U); + EXPECT_EQ(255, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(INT_MIN)); + Mat dst; + src.convertTo(dst, CV_16U); + EXPECT_EQ(0, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32767)); + Mat dst; + src.convertTo(dst, CV_16U); + EXPECT_EQ(0, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32768)); + Mat dst; + src.convertTo(dst, CV_16U); + EXPECT_EQ(0, dst.at(0, 0)) << "src=" << src.at(0, 0); + } + + { + Mat src(4, 64, CV_32SC1, Scalar(65536)); + Mat dst; + src.convertTo(dst, CV_16U); + EXPECT_EQ(65535, dst.at(0, 0)) << "src=" << src.at(0, 0); + } +} + }} // namespace diff --git a/modules/cudaarithm/src/cuda/math.cu b/modules/cudaarithm/src/cuda/math.cu index 41d762f6a6..b885319659 100644 --- a/modules/cudaarithm/src/cuda/math.cu +++ b/modules/cudaarithm/src/cuda/math.cu @@ -278,20 +278,12 @@ namespace { template::is_signed> struct PowOp : unary_function { - float power; + typedef typename LargerType::type LargerType; + LargerType power; __device__ __forceinline__ T operator()(T e) const { - return cudev::saturate_cast(__powf((float)e, power)); - } - }; - template struct PowOp : unary_function - { - float power; - - __device__ __forceinline__ T operator()(T e) const - { - T res = cudev::saturate_cast(__powf((float)e, power)); + T res = cudev::saturate_cast(__powf(e < 0 ? -e : e, power)); if ((e < 0) && (1 & static_cast(power))) res *= -1; @@ -299,22 +291,15 @@ namespace return res; } }; - template<> struct PowOp : unary_function - { - float power; - __device__ __forceinline__ float operator()(float e) const - { - return __powf(::fabs(e), power); - } - }; - template<> struct PowOp : unary_function + template struct PowOp : unary_function { - double power; + typedef typename LargerType::type LargerType; + LargerType power; - __device__ __forceinline__ double operator()(double e) const + __device__ __forceinline__ T operator()(T e) const { - return ::pow(::fabs(e), power); + return cudev::saturate_cast(__powf(e, power)); } }; diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index c737177128..e99f8448fd 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -46,9 +46,9 @@ #include #if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS -#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v6 { +#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v7 { #define CV__DNN_EXPERIMENTAL_NS_END } -namespace cv { namespace dnn { namespace experimental_dnn_v6 { } using namespace experimental_dnn_v6; }} +namespace cv { namespace dnn { namespace experimental_dnn_34_v7 { } using namespace experimental_dnn_34_v7; }} #else #define CV__DNN_EXPERIMENTAL_NS_BEGIN #define CV__DNN_EXPERIMENTAL_NS_END @@ -900,7 +900,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_OUT std::vector& indices, const float eta = 1.f, const int top_k = 0); - CV_EXPORTS void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + CV_EXPORTS_AS(NMSBoxesRotated) void NMSBoxes(const std::vector& bboxes, const std::vector& scores, const float score_threshold, const float nms_threshold, CV_OUT std::vector& indices, const float eta = 1.f, const int top_k = 0); diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index ad4741d008..bc18695521 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -699,9 +699,9 @@ public: } } - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate, bool use_half) + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half) { - if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate) + if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS) { Mat bestBlob; LayerPin bestBlobPin; @@ -747,7 +747,7 @@ public: void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, std::vector& pinsForInternalBlobs, - bool forceCreate = false, bool use_half = false) + bool use_half = false) { CV_TRACE_FUNCTION(); @@ -818,7 +818,7 @@ public: reuse(ld.inputBlobsId[0], blobPin); } else - reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate, use_half); + reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half); } } } @@ -1607,7 +1607,6 @@ struct Net::Impl std::vector pinsForInternalBlobs; blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, - preferableBackend == DNN_BACKEND_INFERENCE_ENGINE, preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 02f5ac8d60..169e280840 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -81,6 +81,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) { if (type == "Convolution") @@ -91,13 +92,19 @@ public: const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout const int group = numOutput / outGroupCn; if (group != 1) + { +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R3) + return preferableTarget == DNN_TARGET_CPU; +#endif return false; + } if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16) return dilation.width == 1 && dilation.height == 1; return true; } } else +#endif // HAVE_INF_ENGINE return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; } diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp index f3aa7a8453..b79fb89a8a 100644 --- a/modules/dnn/src/layers/crop_and_resize_layer.cpp +++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp @@ -99,6 +99,13 @@ public: } } } + if (boxes.rows < out.size[0]) + { + // left = top = right = bottom = 0 + std::vector dstRanges(4, Range::all()); + dstRanges[0] = Range(boxes.rows, out.size[0]); + out(dstRanges).setTo(inp.ptr(0, 0, 0)[0]); + } } private: diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 42a6a6c715..a3879128f9 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -115,6 +115,7 @@ public: // It's true whenever predicted bounding boxes and proposals are normalized to [0, 1]. bool _bboxesNormalized; bool _clip; + bool _groupByClasses; enum { _numAxes = 4 }; static const std::string _layerName; @@ -183,6 +184,7 @@ public: _locPredTransposed = getParameter(params, "loc_pred_transposed", 0, false, false); _bboxesNormalized = getParameter(params, "normalized_bbox", 0, false, true); _clip = getParameter(params, "clip", 0, false, false); + _groupByClasses = getParameter(params, "group_by_classes", 0, false, true); getCodeType(params); @@ -381,7 +383,7 @@ public: { count += outputDetections_(i, &outputsData[count * 7], allDecodedBBoxes[i], allConfidenceScores[i], - allIndices[i]); + allIndices[i], _groupByClasses); } CV_Assert(count == numKept); } @@ -497,7 +499,7 @@ public: { count += outputDetections_(i, &outputsData[count * 7], allDecodedBBoxes[i], allConfidenceScores[i], - allIndices[i]); + allIndices[i], _groupByClasses); } CV_Assert(count == numKept); } @@ -505,9 +507,36 @@ public: size_t outputDetections_( const int i, float* outputsData, const LabelBBox& decodeBBoxes, Mat& confidenceScores, - const std::map >& indicesMap + const std::map >& indicesMap, + bool groupByClasses ) { + std::vector dstIndices; + std::vector > allScores; + for (std::map >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it) + { + int label = it->first; + if (confidenceScores.rows <= label) + CV_Error_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label)); + const std::vector& scores = confidenceScores.row(label); + const std::vector& indices = it->second; + + const int numAllScores = allScores.size(); + allScores.reserve(numAllScores + indices.size()); + for (size_t j = 0; j < indices.size(); ++j) + { + allScores.push_back(std::make_pair(scores[indices[j]], numAllScores + j)); + } + } + if (!groupByClasses) + std::sort(allScores.begin(), allScores.end(), util::SortScorePairDescend); + + dstIndices.resize(allScores.size()); + for (size_t j = 0; j < dstIndices.size(); ++j) + { + dstIndices[allScores[j].second] = j; + } + size_t count = 0; for (std::map >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it) { @@ -524,14 +553,15 @@ public: for (size_t j = 0; j < indices.size(); ++j, ++count) { int idx = indices[j]; + int dstIdx = dstIndices[count]; const util::NormalizedBBox& decode_bbox = label_bboxes->second[idx]; - outputsData[count * 7] = i; - outputsData[count * 7 + 1] = label; - outputsData[count * 7 + 2] = scores[idx]; - outputsData[count * 7 + 3] = decode_bbox.xmin; - outputsData[count * 7 + 4] = decode_bbox.ymin; - outputsData[count * 7 + 5] = decode_bbox.xmax; - outputsData[count * 7 + 6] = decode_bbox.ymax; + outputsData[dstIdx * 7] = i; + outputsData[dstIdx * 7 + 1] = label; + outputsData[dstIdx * 7 + 2] = scores[idx]; + outputsData[dstIdx * 7 + 3] = decode_bbox.xmin; + outputsData[dstIdx * 7 + 4] = decode_bbox.ymin; + outputsData[dstIdx * 7 + 5] = decode_bbox.xmax; + outputsData[dstIdx * 7 + 6] = decode_bbox.ymax; } } return count; diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index c95bdcd509..0a5ed54ca8 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -599,7 +599,8 @@ struct ELUFunctor bool supportBackend(int backendId, int) { - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE; + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_INFERENCE_ENGINE; } void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const @@ -653,8 +654,8 @@ struct ELUFunctor #ifdef HAVE_INF_ENGINE InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp) { - CV_Error(Error::StsNotImplemented, "ELU"); - return InferenceEngine::CNNLayerPtr(); + lp.type = "ELU"; + return InferenceEngine::CNNLayerPtr(new InferenceEngine::CNNLayer(lp)); } #endif // HAVE_INF_ENGINE diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 8ff8390bea..8d9f28dbb3 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -91,8 +91,8 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_HALIDE && haveHalide() || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine(); + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_INFERENCE_ENGINE && (preferableTarget != DNN_TARGET_MYRIAD || type == CHANNEL_NRM); } #ifdef HAVE_OPENCL diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index 5ec5d40e54..dab62f12f7 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -33,9 +33,7 @@ public: interpolation = params.get("interpolation"); CV_Assert(interpolation == "nearest" || interpolation == "bilinear"); - bool alignCorners = params.get("align_corners", false); - if (alignCorners) - CV_Error(Error::StsNotImplemented, "Resize with align_corners=true is not implemented"); + alignCorners = params.get("align_corners", false); } bool getMemoryShapes(const std::vector &inputs, @@ -66,8 +64,15 @@ public: outHeight = outputs[0].size[2]; outWidth = outputs[0].size[3]; } - scaleHeight = static_cast(inputs[0]->size[2]) / outHeight; - scaleWidth = static_cast(inputs[0]->size[3]) / outWidth; + if (alignCorners && outHeight > 1) + scaleHeight = static_cast(inputs[0]->size[2] - 1) / (outHeight - 1); + else + scaleHeight = static_cast(inputs[0]->size[2]) / outHeight; + + if (alignCorners && outWidth > 1) + scaleWidth = static_cast(inputs[0]->size[3] - 1) / (outWidth - 1); + else + scaleWidth = static_cast(inputs[0]->size[3]) / outWidth; } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE @@ -166,6 +171,7 @@ protected: int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight; String interpolation; float scaleWidth, scaleHeight; + bool alignCorners; }; diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index a811f4eae7..841cb13e13 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -24,6 +24,7 @@ #define INF_ENGINE_RELEASE_2018R1 2018010000 #define INF_ENGINE_RELEASE_2018R2 2018020000 +#define INF_ENGINE_RELEASE_2018R3 2018030000 #ifndef INF_ENGINE_RELEASE #warning("IE version have not been provided via command-line. Using 2018R2 by default") @@ -31,6 +32,7 @@ #endif #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) +#define INF_ENGINE_VER_MAJOR_GE(ver) (((INF_ENGINE_RELEASE) / 10000) >= ((ver) / 10000)) #endif // HAVE_INF_ENGINE diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 97701a1826..264d3cbc86 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -737,11 +737,18 @@ void TFImporter::populateNet(Net dstNet) int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); data_layouts[name] = predictedLayout; - if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative") + if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad") { // The first node of dilated convolution subgraph. // Extract input node, dilation rate and paddings. std::string input = layer.input(0); + StrIntVector next_layers; + if (type == "SpaceToBatchND" || type == "Pad") + { + next_layers = getNextLayers(net, name, "Conv2D"); + if (next_layers.empty()) + next_layers = getNextLayers(net, name, "DepthwiseConv2dNative"); + } if (type == "SpaceToBatchND") { // op: "SpaceToBatchND" @@ -762,17 +769,57 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("pad_h", paddings.at(0)); layerParams.set("pad_w", paddings.at(2)); - StrIntVector next_layers = getNextLayers(net, name, "Conv2D"); - if (next_layers.empty()) - { - next_layers = getNextLayers(net, name, "DepthwiseConv2dNative"); - } CV_Assert(next_layers.size() == 1); layer = net.node(next_layers[0].second); layers_to_ignore.insert(next_layers[0].first); name = layer.name(); type = layer.op(); } + else if (type == "Pad") + { + Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(paddings.type() == CV_32SC1); + if (paddings.total() == 8) + { + // Perhabs, we have NHWC padding dimensions order. + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(2), paddings.at(6)); + std::swap(paddings.at(3), paddings.at(7)); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(4), paddings.at(6)); + std::swap(paddings.at(5), paddings.at(7)); + // N C H W + // 0 1 2 3 4 5 6 7 + } + if (next_layers.empty() || paddings.total() != 8 || + paddings.at(4) != paddings.at(5) || + paddings.at(6) != paddings.at(7)) + { + // Just a single padding layer. + layerParams.set("paddings", DictValue::arrayInt((int*)paddings.data, paddings.total())); + + int id = dstNet.addLayer(name, "Padding", layerParams); + layer_id[name] = id; + + connect(layer_id, dstNet, parsePin(input), id, 0); + continue; + } + else + { + // Merge with subsequent convolutional layer. + CV_Assert(next_layers.size() == 1); + + layerParams.set("pad_h", paddings.at(4)); + layerParams.set("pad_w", paddings.at(6)); + + layer = net.node(next_layers[0].second); + layers_to_ignore.insert(next_layers[0].first); + name = layer.name(); + type = layer.op(); + } + } // For the object detection networks, TensorFlow Object Detection API // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) @@ -784,7 +831,7 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("bias_term", false); layerParams.blobs.resize(1); - StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); + next_layers = getNextLayers(net, name, "BiasAdd"); if (next_layers.size() == 1) { layerParams.set("bias_term", true); layerParams.blobs.resize(2); @@ -1416,31 +1463,6 @@ void TFImporter::populateNet(Net dstNet) } } } - else if (type == "Pad") - { - Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(paddings.type() == CV_32SC1); - if (paddings.total() == 8) - { - // Perhabs, we have NHWC padding dimensions order. - // N H W C - // 0 1 2 3 4 5 6 7 - std::swap(*paddings.ptr(0, 2), *paddings.ptr(0, 6)); - std::swap(*paddings.ptr(0, 3), *paddings.ptr(0, 7)); - // N C W H - // 0 1 2 3 4 5 6 7 - std::swap(*paddings.ptr(0, 4), *paddings.ptr(0, 6)); - std::swap(*paddings.ptr(0, 5), *paddings.ptr(0, 7)); - // N C H W - // 0 1 2 3 4 5 6 7 - } - layerParams.set("paddings", DictValue::arrayInt((int*)paddings.data, paddings.total())); - - int id = dstNet.addLayer(name, "Padding", layerParams); - layer_id[name] = id; - - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } else if (type == "FusedBatchNorm") { // op: "FusedBatchNorm" diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 60beca272b..309f0010e4 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -222,9 +222,12 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages) TEST_P(DNNTestNetwork, OpenFace) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000 + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) + throw SkipTestException("Test is enabled starts from OpenVINO 2018R3"); +#endif if (backend == DNN_BACKEND_HALIDE || - (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || - (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)) + (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)) throw SkipTestException(""); processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), ""); } @@ -253,12 +256,19 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) TEST_P(DNNTestNetwork, DenseNet_121) { - if ((backend == DNN_BACKEND_HALIDE) || - (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || - (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL_FP16 || - target == DNN_TARGET_MYRIAD))) + if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); - processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "caffe"); + + float l1 = 0.0, lInf = 0.0; + if (target == DNN_TARGET_OPENCL_FP16) + { + l1 = 9e-3; lInf = 5e-2; + } + else if (target == DNN_TARGET_MYRIAD) + { + l1 = 6e-2; lInf = 0.27; + } + processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf); } TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index b957b8caf4..4491fde5a9 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -374,14 +374,6 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy) TEST_P(Test_Caffe_nets, Colorization) { checkBackend(); - if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) || - (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) || - (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) - throw SkipTestException(""); - - const float l1 = 4e-4; - const float lInf = 3e-3; - Mat inp = blobFromNPY(_tf("colorization_inp.npy")); Mat ref = blobFromNPY(_tf("colorization_out.npy")); Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy")); @@ -398,11 +390,15 @@ TEST_P(Test_Caffe_nets, Colorization) net.setInput(inp); Mat out = net.forward(); + // Reference output values are in range [-29.1, 69.5] + const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.21 : 4e-4; + const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5.3 : 3e-3; normAssert(out, ref, "", l1, lInf); } -TEST(Reproducibility_DenseNet_121, Accuracy) +TEST_P(Test_Caffe_nets, DenseNet_121) { + checkBackend(); const string proto = findDataFile("dnn/DenseNet_121.prototxt", false); const string model = findDataFile("dnn/DenseNet_121.caffemodel", false); @@ -411,12 +407,23 @@ TEST(Reproducibility_DenseNet_121, Accuracy) Mat ref = blobFromNPY(_tf("densenet_121_output.npy")); Net net = readNetFromCaffe(proto, model); - net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); net.setInput(inp); Mat out = net.forward(); - normAssert(out, ref); + // Reference is an array of 1000 values from a range [-6.16, 7.9] + float l1 = default_l1, lInf = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16) + { + l1 = 0.017; lInf = 0.067; + } + else if (target == DNN_TARGET_MYRIAD) + { + l1 = 0.097; lInf = 0.52; + } + normAssert(out, ref, "", l1, lInf); } TEST(Test_Caffe, multiple_inputs) diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 9013ce9774..9fefe4fd04 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -177,7 +177,8 @@ TEST_P(DNNTestOpenVINO, models) Target target = (dnn::Target)(int)get<0>(GetParam()); std::string modelName = get<1>(GetParam()); - if (modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16) + if ((modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16) || + (modelName == "vehicle-license-plate-detection-barrier-0106")) throw SkipTestException(""); std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32"; diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 93840fa20f..a31ce85d0f 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -127,15 +127,9 @@ TEST_P(Test_Caffe_layers, Softmax) testLayerUsingCaffeModels("layer_softmax"); } -TEST_P(Test_Caffe_layers, LRN_spatial) +TEST_P(Test_Caffe_layers, LRN) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) - throw SkipTestException(""); testLayerUsingCaffeModels("layer_lrn_spatial"); -} - -TEST_P(Test_Caffe_layers, LRN_channels) -{ testLayerUsingCaffeModels("layer_lrn_channels"); } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 33516e699a..d95f6f5081 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -399,8 +399,10 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8) TEST_P(Test_TensorFlow_nets, EAST_text_detection) { checkBackend(); - if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) - throw SkipTestException(""); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000 + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) + throw SkipTestException("Test is enabled starts from OpenVINO 2018R3"); +#endif std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false); std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false); @@ -425,8 +427,25 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection) Mat scores = outs[0]; Mat geometry = outs[1]; - normAssert(scores, blobFromNPY(refScoresPath), "scores"); - normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3); + // Scores are in range [0, 1]. Geometry values are in range [-0.23, 290] + double l1_scores = default_l1, lInf_scores = default_lInf; + double l1_geometry = default_l1, lInf_geometry = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16) + { + lInf_scores = 0.11; + l1_geometry = 0.28; lInf_geometry = 5.94; + } + else if (target == DNN_TARGET_MYRIAD) + { + lInf_scores = 0.214; + l1_geometry = 0.47; lInf_geometry = 15.34; + } + else + { + l1_geometry = 1e-4, lInf_geometry = 3e-3; + } + normAssert(scores, blobFromNPY(refScoresPath), "scores", l1_scores, lInf_scores); + normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", l1_geometry, lInf_geometry); } INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets()); @@ -537,4 +556,56 @@ TEST(Test_TensorFlow, two_inputs) normAssert(out, firstInput + secondInput); } +TEST(Test_TensorFlow, Mask_RCNN) +{ + std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt", false); + std::string model = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pb", false); + + Net net = readNetFromTensorflow(model, proto); + Mat img = imread(findDataFile("dnn/street.png", false)); + Mat refDetections = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_out.npy")); + Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy")); + Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false); + + net.setPreferableBackend(DNN_BACKEND_OPENCV); + + net.setInput(blob); + + // Mask-RCNN predicts bounding boxes and segmentation masks. + std::vector outNames(2); + outNames[0] = "detection_out_final"; + outNames[1] = "detection_masks"; + + std::vector outs; + net.forward(outs, outNames); + + Mat outDetections = outs[0]; + Mat outMasks = outs[1]; + normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5); + + // Output size of masks is NxCxHxW where + // N - number of detected boxes + // C - number of classes (excluding background) + // HxW - segmentation shape + const int numDetections = outDetections.size[2]; + + int masksSize[] = {1, numDetections, outMasks.size[2], outMasks.size[3]}; + Mat masks(4, &masksSize[0], CV_32F); + + std::vector srcRanges(4, cv::Range::all()); + std::vector dstRanges(4, cv::Range::all()); + + outDetections = outDetections.reshape(1, outDetections.total() / 7); + for (int i = 0; i < numDetections; ++i) + { + // Get a class id for this bounding box and copy mask only for that class. + int classId = static_cast(outDetections.at(i, 1)); + srcRanges[0] = dstRanges[1] = cv::Range(i, i + 1); + srcRanges[1] = cv::Range(classId, classId + 1); + outMasks(srcRanges).copyTo(masks(dstRanges)); + } + cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()}; + normAssert(masks, refMasks(&topRefMasks[0])); +} + } diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index c07c5b39d8..13e3ddeacb 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -242,15 +242,23 @@ TEST_P(Test_Torch_layers, net_residual) runTorchNet("net_residual", "", false, true); } -typedef testing::TestWithParam Test_Torch_nets; +class Test_Torch_nets : public DNNTestLayer {}; TEST_P(Test_Torch_nets, OpenFace_accuracy) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000 + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) + throw SkipTestException("Test is enabled starts from OpenVINO 2018R3"); +#endif + checkBackend(); + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) + throw SkipTestException(""); + const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false); Net net = readNetFromTorch(model); - net.setPreferableBackend(DNN_BACKEND_OPENCV); - net.setPreferableTarget(GetParam()); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); Mat sample = imread(findDataFile("cv/shared/lena.png", false)); Mat sampleF32(sample.size(), CV_32FC3); @@ -264,11 +272,16 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy) Mat out = net.forward(); Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true); - normAssert(out, outRef); + normAssert(out, outRef, "", default_l1, default_lInf); } TEST_P(Test_Torch_nets, ENet_accuracy) { + checkBackend(); + if (backend == DNN_BACKEND_INFERENCE_ENGINE || + (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) + throw SkipTestException(""); + Net net; { const string model = findDataFile("dnn/Enet-model-best.net", false); @@ -276,8 +289,8 @@ TEST_P(Test_Torch_nets, ENet_accuracy) ASSERT_TRUE(!net.empty()); } - net.setPreferableBackend(DNN_BACKEND_OPENCV); - net.setPreferableTarget(GetParam()); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); Mat sample = imread(_tf("street.png", false)); Mat inputBlob = blobFromImage(sample, 1./255); @@ -314,6 +327,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy) // -model models/instance_norm/feathers.t7 TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) { + checkBackend(); std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7", "dnn/fast_neural_style_instance_norm_feathers.t7"}; std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"}; @@ -323,8 +337,8 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) const string model = findDataFile(models[i], false); Net net = readNetFromTorch(model); - net.setPreferableBackend(DNN_BACKEND_OPENCV); - net.setPreferableTarget(GetParam()); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); Mat img = imread(findDataFile("dnn/googlenet_1.png", false)); Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false); @@ -341,12 +355,20 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) Mat ref = imread(findDataFile(targets[i])); Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false); - normAssert(out, refBlob, "", 0.5, 1.1); + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total(); + if (target == DNN_TARGET_MYRIAD) + EXPECT_LE(normL1, 4.0f); + else + EXPECT_LE(normL1, 0.6f); + } + else + normAssert(out, refBlob, "", 0.5, 1.1); } } -INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets()); - +INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets()); // Test a custom layer // https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index a16292e7d9..15a6939727 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -199,6 +199,7 @@ typedef std::vector vector_Vec6f; typedef std::vector vector_Vec4i; typedef std::vector vector_Rect; typedef std::vector vector_Rect2d; +typedef std::vector vector_RotatedRect; typedef std::vector vector_KeyPoint; typedef std::vector vector_Mat; typedef std::vector > vector_vector_Mat; @@ -1643,6 +1644,18 @@ template<> struct pyopencvVecConverter } }; +template<> struct pyopencvVecConverter +{ + static bool to(PyObject* obj, std::vector& value, const ArgInfo info) + { + return pyopencv_to_generic_vec(obj, value, info); + } + static PyObject* from(const std::vector& value) + { + return pyopencv_from_generic_vec(value); + } +}; + template<> bool pyopencv_to(PyObject *obj, TermCriteria& dst, const char *name) { diff --git a/modules/shape/src/aff_trans.cpp b/modules/shape/src/aff_trans.cpp index 374f8a893b..3d59f5127d 100644 --- a/modules/shape/src/aff_trans.cpp +++ b/modules/shape/src/aff_trans.cpp @@ -165,8 +165,8 @@ static Mat _localAffineEstimate(const std::vector& shape1, const std::v } else { - therow.at(0,0)=-shape1[contPt].y; - therow.at(0,1)=shape1[contPt].x; + therow.at(0,0)=shape1[contPt].y; + therow.at(0,1)=-shape1[contPt].x; therow.at(0,3)=1; therow.row(0).copyTo(matM.row(ii)); matP.at(ii,0) = shape2[contPt].y; diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index fabfa1b260..66dd7656be 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -921,6 +921,14 @@ inline static void recordPropertyVerbose(const std::string & property, } } +inline static void recordPropertyVerbose(const std::string& property, const std::string& msg, + const char* value, const char* build_value = NULL) +{ + return recordPropertyVerbose(property, msg, + value ? std::string(value) : std::string(), + build_value ? std::string(build_value) : std::string()); +} + #ifdef _DEBUG #define CV_TEST_BUILD_CONFIG "Debug" #else diff --git a/modules/videoio/src/cap_xine.cpp b/modules/videoio/src/cap_xine.cpp index ad9c381424..e1c3d998b6 100644 --- a/modules/videoio/src/cap_xine.cpp +++ b/modules/videoio/src/cap_xine.cpp @@ -107,7 +107,7 @@ class XINECapture : public IVideoCapture bool open(const char *filename) { - CV_Assert(!xine, !stream, !vo_port); + CV_Assert_N(!xine, !stream, !vo_port); char configfile[2048] = {0}; xine = xine_new(); @@ -207,7 +207,7 @@ class XINECapture : public IVideoCapture double getProperty(int property_id) const CV_OVERRIDE { - CV_Assert(xine, vo_port, stream); + CV_Assert_N(xine, vo_port, stream); int pos_t, pos_l, length; bool res = (bool)xine_get_pos_length(stream, &pos_l, &pos_t, &length); @@ -240,7 +240,7 @@ class XINECapture : public IVideoCapture protected: bool oldSeekFrame(int f) { - CV_Assert(xine, vo_port, stream); + CV_Assert_N(xine, vo_port, stream); // no need to seek if we are already there... if (f == frame_number) { @@ -290,7 +290,7 @@ protected: bool seekFrame(int f) { - CV_Assert(xine, vo_port, stream); + CV_Assert_N(xine, vo_port, stream); if (seekable) { int new_time = (int)((f + 1) * (float)frame_duration); @@ -309,7 +309,7 @@ protected: bool seekTime(int t) { - CV_Assert(xine, vo_port, stream); + CV_Assert_N(xine, vo_port, stream); if (seekable) { if (xine_play(stream, 0, t)) @@ -328,7 +328,7 @@ protected: bool seekRatio(double ratio) { - CV_Assert(xine, vo_port, stream); + CV_Assert_N(xine, vo_port, stream); if (ratio > 1 || ratio < 0) return false; if (seekable) diff --git a/modules/viz/src/widget.cpp b/modules/viz/src/widget.cpp index ce933ed0d8..6c2789f895 100644 --- a/modules/viz/src/widget.cpp +++ b/modules/viz/src/widget.cpp @@ -301,6 +301,7 @@ void cv::viz::Widget3D::applyTransform(const Affine3d &transform) vtkSmartPointer mapper = vtkPolyDataMapper::SafeDownCast(actor->GetMapper()); CV_Assert("Widget doesn't have a polydata mapper" && mapper); + mapper->Update(); // #10945 VtkUtils::SetInputData(mapper, VtkUtils::TransformPolydata(mapper->GetInput(), transform)); mapper->Update(); } diff --git a/platforms/android/build_sdk.py b/platforms/android/build_sdk.py index bfd0be79f4..df3503e09f 100755 --- a/platforms/android/build_sdk.py +++ b/platforms/android/build_sdk.py @@ -106,9 +106,13 @@ class ABI: self.cmake_vars = dict( ANDROID_STL="gnustl_static", ANDROID_ABI=self.name, - ANDROID_TOOLCHAIN_NAME=toolchain, ANDROID_PLATFORM_ID=platform_id, ) + if toolchain is not None: + self.cmake_vars['ANDROID_TOOLCHAIN_NAME'] = toolchain + else: + self.cmake_vars['ANDROID_TOOLCHAIN'] = 'clang' + self.cmake_vars['ANDROID_STL'] = 'c++_static' if ndk_api_level: self.cmake_vars['ANDROID_NATIVE_API_LEVEL'] = ndk_api_level self.cmake_vars.update(cmake_vars) @@ -206,7 +210,7 @@ class Builder: # Add extra data apkxmldest = check_dir(os.path.join(apkdest, "res", "xml"), create=True) apklibdest = check_dir(os.path.join(apkdest, "libs", abi.name), create=True) - for ver, d in self.extra_packs + [("3.4.2", os.path.join(self.libdest, "lib"))]: + for ver, d in self.extra_packs + [("3.4.3", os.path.join(self.libdest, "lib"))]: r = ET.Element("library", attrib={"version": ver}) log.info("Adding libraries from %s", d) diff --git a/platforms/android/ndk-17.config.py b/platforms/android/ndk-17.config.py new file mode 100644 index 0000000000..9a9b5cc3e8 --- /dev/null +++ b/platforms/android/ndk-17.config.py @@ -0,0 +1,6 @@ +ABIs = [ + ABI("2", "armeabi-v7a", None, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')), + ABI("3", "arm64-v8a", None), + ABI("5", "x86_64", None), + ABI("4", "x86", None), +] diff --git a/platforms/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml index f094417235..6b78d5cfa7 100644 --- a/platforms/android/service/engine/AndroidManifest.xml +++ b/platforms/android/service/engine/AndroidManifest.xml @@ -1,8 +1,8 @@ + android:versionCode="343@ANDROID_PLATFORM_ID@" + android:versionName="3.43"> diff --git a/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java index c2c3950e47..5ecc107197 100644 --- a/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java +++ b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java @@ -137,7 +137,7 @@ public class OpenCVEngineService extends Service { @Override public int getEngineVersion() throws RemoteException { - int version = 3420; + int version = 3430; try { version = getPackageManager().getPackageInfo(getPackageName(), 0).versionCode; } catch (NameNotFoundException e) { diff --git a/platforms/android/service/readme.txt b/platforms/android/service/readme.txt index 45fbe43d9f..144869052d 100644 --- a/platforms/android/service/readme.txt +++ b/platforms/android/service/readme.txt @@ -12,7 +12,7 @@ manually using adb tool: adb install /apk/OpenCV__Manager__.apk -Example: OpenCV_3.4.2-dev_Manager_3.42_armeabi-v7a.apk +Example: OpenCV_3.4.3-dev_Manager_3.43_armeabi-v7a.apk Use the list of platforms below to determine proper OpenCV Manager package for your device: diff --git a/platforms/maven/opencv-it/pom.xml b/platforms/maven/opencv-it/pom.xml index 6722142487..f6d495a75b 100644 --- a/platforms/maven/opencv-it/pom.xml +++ b/platforms/maven/opencv-it/pom.xml @@ -4,7 +4,7 @@ org.opencv opencv-parent - 3.4.2 + 3.4.3 org.opencv opencv-it diff --git a/platforms/maven/opencv/pom.xml b/platforms/maven/opencv/pom.xml index 0599d5b8d2..f0e1e43c6c 100644 --- a/platforms/maven/opencv/pom.xml +++ b/platforms/maven/opencv/pom.xml @@ -4,7 +4,7 @@ org.opencv opencv-parent - 3.4.2 + 3.4.3 org.opencv opencv diff --git a/platforms/maven/pom.xml b/platforms/maven/pom.xml index f3044a8a80..fe4c22db7a 100644 --- a/platforms/maven/pom.xml +++ b/platforms/maven/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.opencv opencv-parent - 3.4.2 + 3.4.3 pom OpenCV Parent POM diff --git a/samples/dnn/mask_rcnn.py b/samples/dnn/mask_rcnn.py new file mode 100644 index 0000000000..cac8d6d1f0 --- /dev/null +++ b/samples/dnn/mask_rcnn.py @@ -0,0 +1,143 @@ +import cv2 as cv +import argparse +import numpy as np + +parser = argparse.ArgumentParser(description= + 'Use this script to run Mask-RCNN object detection and semantic ' + 'segmentation network from TensorFlow Object Detection API.') +parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') +parser.add_argument('--model', required=True, help='Path to a .pb file with weights.') +parser.add_argument('--config', required=True, help='Path to a .pxtxt file contains network configuration.') +parser.add_argument('--classes', help='Optional path to a text file with names of classes.') +parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. ' + 'An every color is represented with three values from 0 to 255 in BGR channels order.') +parser.add_argument('--width', type=int, default=800, + help='Preprocess input image by resizing to a specific width.') +parser.add_argument('--height', type=int, default=800, + help='Preprocess input image by resizing to a specific height.') +parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') +args = parser.parse_args() + +np.random.seed(324) + +# Load names of classes +classes = None +if args.classes: + with open(args.classes, 'rt') as f: + classes = f.read().rstrip('\n').split('\n') + +# Load colors +colors = None +if args.colors: + with open(args.colors, 'rt') as f: + colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')] + +legend = None +def showLegend(classes): + global legend + if not classes is None and legend is None: + blockHeight = 30 + assert(len(classes) == len(colors)) + + legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8) + for i in range(len(classes)): + block = legend[i * blockHeight:(i + 1) * blockHeight] + block[:,:] = colors[i] + cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) + + cv.namedWindow('Legend', cv.WINDOW_NORMAL) + cv.imshow('Legend', legend) + classes = None + + +def drawBox(frame, classId, conf, left, top, right, bottom): + # Draw a bounding box. + cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0)) + + label = '%.2f' % conf + + # Print a label of class. + if classes: + assert(classId < len(classes)) + label = '%s: %s' % (classes[classId], label) + + labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) + top = max(top, labelSize[1]) + cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED) + cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) + + +# Load a network +net = cv.dnn.readNet(args.model, args.config) +net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) + +winName = 'Mask-RCNN in OpenCV' +cv.namedWindow(winName, cv.WINDOW_NORMAL) + +cap = cv.VideoCapture(args.input if args.input else 0) +legend = None +while cv.waitKey(1) < 0: + hasFrame, frame = cap.read() + if not hasFrame: + cv.waitKey() + break + + frameH = frame.shape[0] + frameW = frame.shape[1] + + # Create a 4D blob from a frame. + blob = cv.dnn.blobFromImage(frame, size=(args.width, args.height), swapRB=True, crop=False) + + # Run a model + net.setInput(blob) + + boxes, masks = net.forward(['detection_out_final', 'detection_masks']) + + numClasses = masks.shape[1] + numDetections = boxes.shape[2] + + # Draw segmentation + if not colors: + # Generate colors + colors = [np.array([0, 0, 0], np.uint8)] + for i in range(1, numClasses + 1): + colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2) + del colors[0] + + boxesToDraw = [] + for i in range(numDetections): + box = boxes[0, 0, i] + mask = masks[i] + score = box[2] + if score > args.thr: + classId = int(box[1]) + left = int(frameW * box[3]) + top = int(frameH * box[4]) + right = int(frameW * box[5]) + bottom = int(frameH * box[6]) + + left = max(0, min(left, frameW - 1)) + top = max(0, min(top, frameH - 1)) + right = max(0, min(right, frameW - 1)) + bottom = max(0, min(bottom, frameH - 1)) + + boxesToDraw.append([frame, classId, score, left, top, right, bottom]) + + classMask = mask[classId] + classMask = cv.resize(classMask, (right - left + 1, bottom - top + 1)) + mask = (classMask > 0.5) + + roi = frame[top:bottom+1, left:right+1][mask] + frame[top:bottom+1, left:right+1][mask] = (0.7 * colors[classId] + 0.3 * roi).astype(np.uint8) + + for box in boxesToDraw: + drawBox(*box) + + # Put efficiency information. + t, _ = net.getPerfProfile() + label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) + cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) + + showLegend(classes) + + cv.imshow(winName, frame) diff --git a/samples/dnn/tf_text_graph_common.py b/samples/dnn/tf_text_graph_common.py index 61e3bbcaee..061718e7a0 100644 --- a/samples/dnn/tf_text_graph_common.py +++ b/samples/dnn/tf_text_graph_common.py @@ -23,3 +23,98 @@ def addConstNode(name, values, graph_def): node.op = 'Const' text_format.Merge(tensorMsg(values), node.attr["value"]) graph_def.node.extend([node]) + + +def addSlice(inp, out, begins, sizes, graph_def): + beginsNode = NodeDef() + beginsNode.name = out + '/begins' + beginsNode.op = 'Const' + text_format.Merge(tensorMsg(begins), beginsNode.attr["value"]) + graph_def.node.extend([beginsNode]) + + sizesNode = NodeDef() + sizesNode.name = out + '/sizes' + sizesNode.op = 'Const' + text_format.Merge(tensorMsg(sizes), sizesNode.attr["value"]) + graph_def.node.extend([sizesNode]) + + sliced = NodeDef() + sliced.name = out + sliced.op = 'Slice' + sliced.input.append(inp) + sliced.input.append(beginsNode.name) + sliced.input.append(sizesNode.name) + graph_def.node.extend([sliced]) + + +def addReshape(inp, out, shape, graph_def): + shapeNode = NodeDef() + shapeNode.name = out + '/shape' + shapeNode.op = 'Const' + text_format.Merge(tensorMsg(shape), shapeNode.attr["value"]) + graph_def.node.extend([shapeNode]) + + reshape = NodeDef() + reshape.name = out + reshape.op = 'Reshape' + reshape.input.append(inp) + reshape.input.append(shapeNode.name) + graph_def.node.extend([reshape]) + + +def addSoftMax(inp, out, graph_def): + softmax = NodeDef() + softmax.name = out + softmax.op = 'Softmax' + text_format.Merge('i: -1', softmax.attr['axis']) + softmax.input.append(inp) + graph_def.node.extend([softmax]) + + +def addFlatten(inp, out, graph_def): + flatten = NodeDef() + flatten.name = out + flatten.op = 'Flatten' + flatten.input.append(inp) + graph_def.node.extend([flatten]) + + +# Removes Identity nodes +def removeIdentity(graph_def): + identities = {} + for node in graph_def.node: + if node.op == 'Identity': + identities[node.name] = node.input[0] + graph_def.node.remove(node) + + for node in graph_def.node: + for i in range(len(node.input)): + if node.input[i] in identities: + node.input[i] = identities[node.input[i]] + + +def removeUnusedNodesAndAttrs(to_remove, graph_def): + unusedAttrs = ['T', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu', + 'Index', 'Tperm', 'is_training', 'Tpaddings'] + + removedNodes = [] + + for i in reversed(range(len(graph_def.node))): + op = graph_def.node[i].op + name = graph_def.node[i].name + + if op == 'Const' or to_remove(name, op): + if op != 'Const': + removedNodes.append(name) + + del graph_def.node[i] + else: + for attr in unusedAttrs: + if attr in graph_def.node[i].attr: + del graph_def.node[i].attr[attr] + + # Remove references to removed nodes except Const nodes. + for node in graph_def.node: + for i in reversed(range(len(node.input))): + if node.input[i] in removedNodes: + del node.input[i] diff --git a/samples/dnn/tf_text_graph_faster_rcnn.py b/samples/dnn/tf_text_graph_faster_rcnn.py index 9aea38424a..d18d82bfae 100644 --- a/samples/dnn/tf_text_graph_faster_rcnn.py +++ b/samples/dnn/tf_text_graph_faster_rcnn.py @@ -6,7 +6,7 @@ from tensorflow.core.framework.node_def_pb2 import NodeDef from tensorflow.tools.graph_transforms import TransformGraph from google.protobuf import text_format -from tf_text_graph_common import tensorMsg, addConstNode +from tf_text_graph_common import * parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' 'SSD model from TensorFlow Object Detection API. ' @@ -37,50 +37,17 @@ scopesToIgnore = ('FirstStageFeatureExtractor/Assert', 'FirstStageFeatureExtractor/GreaterEqual', 'FirstStageFeatureExtractor/LogicalAnd') -unusedAttrs = ['T', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu', - 'Index', 'Tperm', 'is_training', 'Tpaddings'] - # Read the graph. with tf.gfile.FastGFile(args.input, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) -# Removes Identity nodes -def removeIdentity(): - identities = {} - for node in graph_def.node: - if node.op == 'Identity': - identities[node.name] = node.input[0] - graph_def.node.remove(node) - - for node in graph_def.node: - for i in range(len(node.input)): - if node.input[i] in identities: - node.input[i] = identities[node.input[i]] - -removeIdentity() - -removedNodes = [] - -for i in reversed(range(len(graph_def.node))): - op = graph_def.node[i].op - name = graph_def.node[i].name +removeIdentity(graph_def) - if op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep): - if op != 'Const': - removedNodes.append(name) +def to_remove(name, op): + return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) - del graph_def.node[i] - else: - for attr in unusedAttrs: - if attr in graph_def.node[i].attr: - del graph_def.node[i].attr[attr] - -# Remove references to removed nodes except Const nodes. -for node in graph_def.node: - for i in reversed(range(len(node.input))): - if node.input[i] in removedNodes: - del node.input[i] +removeUnusedNodesAndAttrs(to_remove, graph_def) # Connect input node to the first layer @@ -95,68 +62,18 @@ while True: if node.op == 'CropAndResize': break -def addSlice(inp, out, begins, sizes): - beginsNode = NodeDef() - beginsNode.name = out + '/begins' - beginsNode.op = 'Const' - text_format.Merge(tensorMsg(begins), beginsNode.attr["value"]) - graph_def.node.extend([beginsNode]) - - sizesNode = NodeDef() - sizesNode.name = out + '/sizes' - sizesNode.op = 'Const' - text_format.Merge(tensorMsg(sizes), sizesNode.attr["value"]) - graph_def.node.extend([sizesNode]) - - sliced = NodeDef() - sliced.name = out - sliced.op = 'Slice' - sliced.input.append(inp) - sliced.input.append(beginsNode.name) - sliced.input.append(sizesNode.name) - graph_def.node.extend([sliced]) - -def addReshape(inp, out, shape): - shapeNode = NodeDef() - shapeNode.name = out + '/shape' - shapeNode.op = 'Const' - text_format.Merge(tensorMsg(shape), shapeNode.attr["value"]) - graph_def.node.extend([shapeNode]) - - reshape = NodeDef() - reshape.name = out - reshape.op = 'Reshape' - reshape.input.append(inp) - reshape.input.append(shapeNode.name) - graph_def.node.extend([reshape]) - -def addSoftMax(inp, out): - softmax = NodeDef() - softmax.name = out - softmax.op = 'Softmax' - text_format.Merge('i: -1', softmax.attr['axis']) - softmax.input.append(inp) - graph_def.node.extend([softmax]) - -def addFlatten(inp, out): - flatten = NodeDef() - flatten.name = out - flatten.op = 'Flatten' - flatten.input.append(inp) - graph_def.node.extend([flatten]) - addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd', - 'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2]) + 'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2], graph_def) addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1', - 'FirstStageBoxPredictor/ClassPredictor/softmax') # Compare with Reshape_4 + 'FirstStageBoxPredictor/ClassPredictor/softmax', graph_def) # Compare with Reshape_4 addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax', - 'FirstStageBoxPredictor/ClassPredictor/softmax/flatten') + 'FirstStageBoxPredictor/ClassPredictor/softmax/flatten', graph_def) # Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd', - 'FirstStageBoxPredictor/BoxEncodingPredictor/flatten') + 'FirstStageBoxPredictor/BoxEncodingPredictor/flatten', graph_def) proposals = NodeDef() proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized) @@ -218,14 +135,14 @@ graph_def.node.extend([clipByValueNode]) for node in reversed(topNodes): graph_def.node.extend([node]) -addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax') +addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax', graph_def) addSlice('SecondStageBoxPredictor/Reshape_1/softmax', 'SecondStageBoxPredictor/Reshape_1/slice', - [0, 0, 1], [-1, -1, -1]) + [0, 0, 1], [-1, -1, -1], graph_def) addReshape('SecondStageBoxPredictor/Reshape_1/slice', - 'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1]) + 'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def) # Replace Flatten subgraph onto a single node. for i in reversed(range(len(graph_def.node))): @@ -255,7 +172,7 @@ for node in graph_def.node: ################################################################################ ### Postprocessing ################################################################################ -addSlice('detection_out/clip_by_value', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4]) +addSlice('detection_out/clip_by_value', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4], graph_def) variance = NodeDef() variance.name = 'proposals/variance' @@ -271,8 +188,8 @@ varianceEncoder.input.append(variance.name) text_format.Merge('i: 2', varianceEncoder.attr["axis"]) graph_def.node.extend([varianceEncoder]) -addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1]) -addFlatten('variance_encoded', 'variance_encoded/flatten') +addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1], graph_def) +addFlatten('variance_encoded', 'variance_encoded/flatten', graph_def) detectionOut = NodeDef() detectionOut.name = 'detection_out_final' diff --git a/samples/dnn/tf_text_graph_mask_rcnn.py b/samples/dnn/tf_text_graph_mask_rcnn.py new file mode 100644 index 0000000000..4c6997f930 --- /dev/null +++ b/samples/dnn/tf_text_graph_mask_rcnn.py @@ -0,0 +1,230 @@ +import argparse +import numpy as np +import tensorflow as tf + +from tensorflow.core.framework.node_def_pb2 import NodeDef +from tensorflow.tools.graph_transforms import TransformGraph +from google.protobuf import text_format + +from tf_text_graph_common import * + +parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' + 'Mask-RCNN model from TensorFlow Object Detection API. ' + 'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.') +parser.add_argument('--input', required=True, help='Path to frozen TensorFlow graph.') +parser.add_argument('--output', required=True, help='Path to output text graph.') +parser.add_argument('--num_classes', default=90, type=int, help='Number of trained classes.') +parser.add_argument('--scales', default=[0.25, 0.5, 1.0, 2.0], type=float, nargs='+', + help='Hyper-parameter of grid_anchor_generator from a config file.') +parser.add_argument('--aspect_ratios', default=[0.5, 1.0, 2.0], type=float, nargs='+', + help='Hyper-parameter of grid_anchor_generator from a config file.') +parser.add_argument('--features_stride', default=16, type=float, nargs='+', + help='Hyper-parameter from a config file.') +args = parser.parse_args() + +scopesToKeep = ('FirstStageFeatureExtractor', 'Conv', + 'FirstStageBoxPredictor/BoxEncodingPredictor', + 'FirstStageBoxPredictor/ClassPredictor', + 'CropAndResize', + 'MaxPool2D', + 'SecondStageFeatureExtractor', + 'SecondStageBoxPredictor', + 'Preprocessor/sub', + 'Preprocessor/mul', + 'image_tensor') + +scopesToIgnore = ('FirstStageFeatureExtractor/Assert', + 'FirstStageFeatureExtractor/Shape', + 'FirstStageFeatureExtractor/strided_slice', + 'FirstStageFeatureExtractor/GreaterEqual', + 'FirstStageFeatureExtractor/LogicalAnd') + + +# Read the graph. +with tf.gfile.FastGFile(args.input, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + +removeIdentity(graph_def) + +def to_remove(name, op): + return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) + +removeUnusedNodesAndAttrs(to_remove, graph_def) + + +# Connect input node to the first layer +assert(graph_def.node[0].op == 'Placeholder') +graph_def.node[1].input.insert(0, graph_def.node[0].name) + +# Temporarily remove top nodes. +topNodes = [] +numCropAndResize = 0 +while True: + node = graph_def.node.pop() + topNodes.append(node) + if node.op == 'CropAndResize': + numCropAndResize += 1 + if numCropAndResize == 2: + break + +addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd', + 'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2], graph_def) + +addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1', + 'FirstStageBoxPredictor/ClassPredictor/softmax', graph_def) # Compare with Reshape_4 + +addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax', + 'FirstStageBoxPredictor/ClassPredictor/softmax/flatten', graph_def) + +# Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd +addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd', + 'FirstStageBoxPredictor/BoxEncodingPredictor/flatten', graph_def) + +proposals = NodeDef() +proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized) +proposals.op = 'PriorBox' +proposals.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd') +proposals.input.append(graph_def.node[0].name) # image_tensor + +text_format.Merge('b: false', proposals.attr["flip"]) +text_format.Merge('b: true', proposals.attr["clip"]) +text_format.Merge('f: %f' % args.features_stride, proposals.attr["step"]) +text_format.Merge('f: 0.0', proposals.attr["offset"]) +text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), proposals.attr["variance"]) + +widths = [] +heights = [] +for a in args.aspect_ratios: + for s in args.scales: + ar = np.sqrt(a) + heights.append((args.features_stride**2) * s / ar) + widths.append((args.features_stride**2) * s * ar) + +text_format.Merge(tensorMsg(widths), proposals.attr["width"]) +text_format.Merge(tensorMsg(heights), proposals.attr["height"]) + +graph_def.node.extend([proposals]) + +# Compare with Reshape_5 +detectionOut = NodeDef() +detectionOut.name = 'detection_out' +detectionOut.op = 'DetectionOutput' + +detectionOut.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/flatten') +detectionOut.input.append('FirstStageBoxPredictor/ClassPredictor/softmax/flatten') +detectionOut.input.append('proposals') + +text_format.Merge('i: 2', detectionOut.attr['num_classes']) +text_format.Merge('b: true', detectionOut.attr['share_location']) +text_format.Merge('i: 0', detectionOut.attr['background_label_id']) +text_format.Merge('f: 0.7', detectionOut.attr['nms_threshold']) +text_format.Merge('i: 6000', detectionOut.attr['top_k']) +text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type']) +text_format.Merge('i: 100', detectionOut.attr['keep_top_k']) +text_format.Merge('b: true', detectionOut.attr['clip']) + +graph_def.node.extend([detectionOut]) + +# Save as text. +for node in reversed(topNodes): + if node.op != 'CropAndResize': + graph_def.node.extend([node]) + topNodes.pop() + else: + if numCropAndResize == 1: + break + else: + graph_def.node.extend([node]) + topNodes.pop() + numCropAndResize -= 1 + +addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax', graph_def) + +addSlice('SecondStageBoxPredictor/Reshape_1/softmax', + 'SecondStageBoxPredictor/Reshape_1/slice', + [0, 0, 1], [-1, -1, -1], graph_def) + +addReshape('SecondStageBoxPredictor/Reshape_1/slice', + 'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def) + +# Replace Flatten subgraph onto a single node. +for i in reversed(range(len(graph_def.node))): + if graph_def.node[i].op == 'CropAndResize': + graph_def.node[i].input.insert(1, 'detection_out') + + if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape': + addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def) + + graph_def.node[i].input.pop() + graph_def.node[i].input.append('SecondStageBoxPredictor/Reshape/shape2') + + if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape', + 'SecondStageBoxPredictor/Flatten/flatten/strided_slice', + 'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape']: + del graph_def.node[i] + +for node in graph_def.node: + if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape': + node.op = 'Flatten' + node.input.pop() + + if node.name in ['FirstStageBoxPredictor/BoxEncodingPredictor/Conv2D', + 'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']: + text_format.Merge('b: true', node.attr["loc_pred_transposed"]) + +################################################################################ +### Postprocessing +################################################################################ +addSlice('detection_out', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4], graph_def) + +variance = NodeDef() +variance.name = 'proposals/variance' +variance.op = 'Const' +text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), variance.attr["value"]) +graph_def.node.extend([variance]) + +varianceEncoder = NodeDef() +varianceEncoder.name = 'variance_encoded' +varianceEncoder.op = 'Mul' +varianceEncoder.input.append('SecondStageBoxPredictor/Reshape') +varianceEncoder.input.append(variance.name) +text_format.Merge('i: 2', varianceEncoder.attr["axis"]) +graph_def.node.extend([varianceEncoder]) + +addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1], graph_def) +addFlatten('variance_encoded', 'variance_encoded/flatten', graph_def) + +detectionOut = NodeDef() +detectionOut.name = 'detection_out_final' +detectionOut.op = 'DetectionOutput' + +detectionOut.input.append('variance_encoded/flatten') +detectionOut.input.append('SecondStageBoxPredictor/Reshape_1/Reshape') +detectionOut.input.append('detection_out/slice/reshape') + +text_format.Merge('i: %d' % args.num_classes, detectionOut.attr['num_classes']) +text_format.Merge('b: false', detectionOut.attr['share_location']) +text_format.Merge('i: %d' % (args.num_classes + 1), detectionOut.attr['background_label_id']) +text_format.Merge('f: 0.6', detectionOut.attr['nms_threshold']) +text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type']) +text_format.Merge('i: 100', detectionOut.attr['keep_top_k']) +text_format.Merge('b: true', detectionOut.attr['clip']) +text_format.Merge('b: true', detectionOut.attr['variance_encoded_in_target']) +text_format.Merge('f: 0.3', detectionOut.attr['confidence_threshold']) +text_format.Merge('b: false', detectionOut.attr['group_by_classes']) +graph_def.node.extend([detectionOut]) + +for node in reversed(topNodes): + graph_def.node.extend([node]) + +for i in reversed(range(len(graph_def.node))): + if graph_def.node[i].op == 'CropAndResize': + graph_def.node[i].input.insert(1, 'detection_out_final') + break + +graph_def.node[-1].name = 'detection_masks' +graph_def.node[-1].op = 'Sigmoid' +graph_def.node[-1].input.pop() + +tf.train.write_graph(graph_def, "", args.output, as_text=True) diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py index 573a6d8941..0d4a41f34a 100644 --- a/samples/dnn/tf_text_graph_ssd.py +++ b/samples/dnn/tf_text_graph_ssd.py @@ -15,7 +15,7 @@ from math import sqrt from tensorflow.core.framework.node_def_pb2 import NodeDef from tensorflow.tools.graph_transforms import TransformGraph from google.protobuf import text_format -from tf_text_graph_common import tensorMsg, addConstNode +from tf_text_graph_common import * parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' 'SSD model from TensorFlow Object Detection API. ' @@ -41,10 +41,6 @@ args = parser.parse_args() keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu6', 'Placeholder', 'FusedBatchNorm', 'DepthwiseConv2dNative', 'ConcatV2', 'Mul', 'MaxPool', 'AvgPool', 'Identity'] -# Nodes attributes that could be removed because they are not used during import. -unusedAttrs = ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu', - 'Index', 'Tperm', 'is_training', 'Tpaddings'] - # Node with which prefixes should be removed prefixesToRemove = ('MultipleGridAnchorGenerator/', 'Postprocessor/', 'Preprocessor/') @@ -66,7 +62,6 @@ def getUnconnectedNodes(): unconnected.remove(inp) return unconnected -removedNodes = [] # Detect unfused batch normalization nodes and fuse them. def fuse_batch_normalization(): @@ -118,41 +113,13 @@ def fuse_batch_normalization(): fuse_batch_normalization() -# Removes Identity nodes -def removeIdentity(): - identities = {} - for node in graph_def.node: - if node.op == 'Identity': - identities[node.name] = node.input[0] - graph_def.node.remove(node) - - for node in graph_def.node: - for i in range(len(node.input)): - if node.input[i] in identities: - node.input[i] = identities[node.input[i]] - -removeIdentity() - -# Remove extra nodes and attributes. -for i in reversed(range(len(graph_def.node))): - op = graph_def.node[i].op - name = graph_def.node[i].name +removeIdentity(graph_def) - if (not op in keepOps) or name.startswith(prefixesToRemove): - if op != 'Const': - removedNodes.append(name) +def to_remove(name, op): + return (not op in keepOps) or name.startswith(prefixesToRemove) - del graph_def.node[i] - else: - for attr in unusedAttrs: - if attr in graph_def.node[i].attr: - del graph_def.node[i].attr[attr] +removeUnusedNodesAndAttrs(to_remove, graph_def) -# Remove references to removed nodes except Const nodes. -for node in graph_def.node: - for i in reversed(range(len(node.input))): - if node.input[i] in removedNodes: - del node.input[i] # Connect input node to the first layer assert(graph_def.node[0].op == 'Placeholder') @@ -175,8 +142,8 @@ def addConcatNode(name, inputs, axisNodeName): concat.input.append(axisNodeName) graph_def.node.extend([concat]) -addConstNode('concat/axis_flatten', [-1]) -addConstNode('PriorBox/concat/axis', [-2]) +addConstNode('concat/axis_flatten', [-1], graph_def) +addConstNode('PriorBox/concat/axis', [-2], graph_def) for label in ['ClassPredictor', 'BoxEncodingPredictor' if args.box_predictor is 'convolutional' else 'BoxPredictor']: concatInputs = []