Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/12339/head
Alexander Alekhin 7 years ago
commit c1db75e0c7
  1. 2
      CMakeLists.txt
  2. 4
      cmake/OpenCVDetectInferenceEngine.cmake
  3. 4
      cmake/OpenCVFindVA.cmake
  4. 2
      doc/tutorials/dnn/dnn_android/dnn_android.markdown
  5. 4
      doc/tutorials/introduction/cross_referencing/tutorial_cross_referencing.markdown
  6. 86
      modules/core/misc/java/src/java/core+MatOfRotatedRect.java
  7. 22
      modules/core/misc/java/test/RotatedRectTest.java
  8. 175
      modules/core/src/va_intel.cpp
  9. 67
      modules/core/test/test_arithm.cpp
  10. 31
      modules/cudaarithm/src/cuda/math.cu
  11. 6
      modules/dnn/include/opencv2/dnn/dnn.hpp
  12. 9
      modules/dnn/src/dnn.cpp
  13. 7
      modules/dnn/src/layers/convolution_layer.cpp
  14. 7
      modules/dnn/src/layers/crop_and_resize_layer.cpp
  15. 50
      modules/dnn/src/layers/detection_output_layer.cpp
  16. 7
      modules/dnn/src/layers/elementwise_layers.cpp
  17. 4
      modules/dnn/src/layers/lrn_layer.cpp
  18. 16
      modules/dnn/src/layers/resize_layer.cpp
  19. 2
      modules/dnn/src/op_inf_engine.hpp
  20. 86
      modules/dnn/src/tensorflow/tf_importer.cpp
  21. 24
      modules/dnn/test/test_backends.cpp
  22. 29
      modules/dnn/test/test_caffe_importer.cpp
  23. 3
      modules/dnn/test/test_ie_models.cpp
  24. 8
      modules/dnn/test/test_layers.cpp
  25. 79
      modules/dnn/test/test_tf_importer.cpp
  26. 44
      modules/dnn/test/test_torch_importer.cpp
  27. 13
      modules/python/src2/cv2.cpp
  28. 4
      modules/shape/src/aff_trans.cpp
  29. 8
      modules/ts/src/ts.cpp
  30. 12
      modules/videoio/src/cap_xine.cpp
  31. 1
      modules/viz/src/widget.cpp
  32. 8
      platforms/android/build_sdk.py
  33. 6
      platforms/android/ndk-17.config.py
  34. 4
      platforms/android/service/engine/AndroidManifest.xml
  35. 2
      platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java
  36. 2
      platforms/android/service/readme.txt
  37. 2
      platforms/maven/opencv-it/pom.xml
  38. 2
      platforms/maven/opencv/pom.xml
  39. 2
      platforms/maven/pom.xml
  40. 143
      samples/dnn/mask_rcnn.py
  41. 95
      samples/dnn/tf_text_graph_common.py
  42. 113
      samples/dnn/tf_text_graph_faster_rcnn.py
  43. 230
      samples/dnn/tf_text_graph_mask_rcnn.py
  44. 47
      samples/dnn/tf_text_graph_ssd.py

@ -275,7 +275,7 @@ OCV_OPTION(WITH_VA "Include VA support" OFF
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF IF ((UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)) )
OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" ON IF (UNIX AND NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" OFF IF (UNIX AND NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_LAPACK "Include Lapack library support" (NOT CV_DISABLE_OPTIMIZATION) IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_ITT "Include Intel ITT support" ON IF (NOT APPLE_FRAMEWORK) )
OCV_OPTION(WITH_PROTOBUF "Enable libprotobuf" ON )

@ -78,9 +78,9 @@ endif()
if(INF_ENGINE_TARGET)
if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version have not been set, 2018R2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
message(WARNING "InferenceEngine version have not been set, 2018R3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
endif()
set(INF_ENGINE_RELEASE "2018020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)")
set(INF_ENGINE_RELEASE "2018030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
)

@ -12,7 +12,9 @@ endif()
if(VA_INCLUDE_DIR)
set(HAVE_VA TRUE)
set(VA_LIBRARIES "-lva" "-lva-drm")
if(NOT DEFINED VA_LIBRARIES)
set(VA_LIBRARIES "va" "va-drm")
endif()
else()
set(HAVE_VA FALSE)
message(WARNING "libva installation is not found.")

@ -12,7 +12,7 @@ Tutorial was written for the following versions of corresponding software:
- Download and install Android Studio from https://developer.android.com/studio.
- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.2-android-sdk.zip`).
- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-3.4.3-android-sdk.zip`).
- Download MobileNet object detection model from https://github.com/chuanqi305/MobileNet-SSD. We need a configuration file `MobileNetSSD_deploy.prototxt` and weights `MobileNetSSD_deploy.caffemodel`.

@ -36,14 +36,14 @@ Open your Doxyfile using your favorite text editor and search for the key
`TAGFILES`. Change it as follows:
@code
TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2
TAGFILES = ./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.3
@endcode
If you had other definitions already, you can append the line using a `\`:
@code
TAGFILES = ./docs/doxygen-tags/libstdc++.tag=https://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen \
./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.2
./docs/doxygen-tags/opencv.tag=http://docs.opencv.org/3.4.3
@endcode
Doxygen can now use the information from the tag file to link to the OpenCV

@ -0,0 +1,86 @@
package org.opencv.core;
import java.util.Arrays;
import java.util.List;
import org.opencv.core.RotatedRect;
public class MatOfRotatedRect extends Mat {
// 32FC5
private static final int _depth = CvType.CV_32F;
private static final int _channels = 5;
public MatOfRotatedRect() {
super();
}
protected MatOfRotatedRect(long addr) {
super(addr);
if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incompatible Mat");
//FIXME: do we need release() here?
}
public static MatOfRotatedRect fromNativeAddr(long addr) {
return new MatOfRotatedRect(addr);
}
public MatOfRotatedRect(Mat m) {
super(m, Range.all());
if( !empty() && checkVector(_channels, _depth) < 0 )
throw new IllegalArgumentException("Incompatible Mat");
//FIXME: do we need release() here?
}
public MatOfRotatedRect(RotatedRect...a) {
super();
fromArray(a);
}
public void alloc(int elemNumber) {
if(elemNumber>0)
super.create(elemNumber, 1, CvType.makeType(_depth, _channels));
}
public void fromArray(RotatedRect...a) {
if(a==null || a.length==0)
return;
int num = a.length;
alloc(num);
float buff[] = new float[num * _channels];
for(int i=0; i<num; i++) {
RotatedRect r = a[i];
buff[_channels*i+0] = (float) r.center.x;
buff[_channels*i+1] = (float) r.center.y;
buff[_channels*i+2] = (float) r.size.width;
buff[_channels*i+3] = (float) r.size.height;
buff[_channels*i+4] = (float) r.angle;
}
put(0, 0, buff); //TODO: check ret val!
}
public RotatedRect[] toArray() {
int num = (int) total();
RotatedRect[] a = new RotatedRect[num];
if(num == 0)
return a;
float buff[] = new float[_channels];
for(int i=0; i<num; i++) {
get(i, 0, buff); //TODO: check ret val!
a[i] = new RotatedRect(new Point(buff[0],buff[1]),new Size(buff[2],buff[3]),buff[4]);
}
return a;
}
public void fromList(List<RotatedRect> lr) {
RotatedRect ap[] = lr.toArray(new RotatedRect[0]);
fromArray(ap);
}
public List<RotatedRect> toList() {
RotatedRect[] ar = toArray();
return Arrays.asList(ar);
}
}

@ -1,11 +1,16 @@
package org.opencv.test.core;
import org.opencv.core.CvType;
import org.opencv.core.Point;
import org.opencv.core.Rect;
import org.opencv.core.RotatedRect;
import org.opencv.core.MatOfRotatedRect;
import org.opencv.core.Size;
import org.opencv.test.OpenCVTestCase;
import java.util.Arrays;
import java.util.List;
public class RotatedRectTest extends OpenCVTestCase {
private double angle;
@ -188,4 +193,21 @@ public class RotatedRectTest extends OpenCVTestCase {
assertEquals(expected, actual);
}
public void testMatOfRotatedRect() {
RotatedRect a = new RotatedRect(new Point(1,2),new Size(3,4),5.678);
RotatedRect b = new RotatedRect(new Point(9,8),new Size(7,6),5.432);
MatOfRotatedRect m = new MatOfRotatedRect(a,b,a,b,a,b,a,b);
assertEquals(m.rows(), 8);
assertEquals(m.cols(), 1);
assertEquals(m.type(), CvType.CV_32FC(5));
RotatedRect[] arr = m.toArray();
assertEquals(arr[2].angle, a.angle, EPS);
assertEquals(arr[3].center.x, b.center.x);
assertEquals(arr[3].size.width, b.size.width);
List<RotatedRect> li = m.toList();
assertEquals(li.size(), 8);
RotatedRect rr = li.get(7);
assertEquals(rr.angle, b.angle, EPS);
assertEquals(rr.center.y, b.center.y);
}
}

@ -324,6 +324,163 @@ static void copy_convert_bgr_to_nv12(const VAImage& image, const Mat& bgr, unsig
dstUV += dstStepUV;
}
}
static void copy_convert_yv12_to_bgr(const VAImage& image, const unsigned char* buffer, Mat& bgr)
{
const float d1 = 16.0f;
const float d2 = 128.0f;
static const float coeffs[5] =
{
1.163999557f,
2.017999649f,
-0.390999794f,
-0.812999725f,
1.5959997177f
};
CV_CheckEQ(image.format.fourcc, VA_FOURCC_YV12, "Unexpected image format");
CV_CheckEQ(image.num_planes, 3, "");
const size_t srcOffsetY = image.offsets[0];
const size_t srcOffsetV = image.offsets[1];
const size_t srcOffsetU = image.offsets[2];
const size_t srcStepY = image.pitches[0];
const size_t srcStepU = image.pitches[1];
const size_t srcStepV = image.pitches[2];
const size_t dstStep = bgr.step;
const unsigned char* srcY_ = buffer + srcOffsetY;
const unsigned char* srcV_ = buffer + srcOffsetV;
const unsigned char* srcU_ = buffer + srcOffsetU;
for (int y = 0; y < bgr.rows; y += 2)
{
const unsigned char* srcY0 = srcY_ + (srcStepY) * y;
const unsigned char* srcY1 = srcY0 + srcStepY;
const unsigned char* srcV = srcV_ + (srcStepV) * y / 2;
const unsigned char* srcU = srcU_ + (srcStepU) * y / 2;
unsigned char* dst0 = bgr.data + (dstStep) * y;
unsigned char* dst1 = dst0 + dstStep;
for (int x = 0; x < bgr.cols; x += 2)
{
float Y0 = float(srcY0[x+0]);
float Y1 = float(srcY0[x+1]);
float Y2 = float(srcY1[x+0]);
float Y3 = float(srcY1[x+1]);
float U = float(srcU[x/2]) - d2;
float V = float(srcV[x/2]) - d2;
Y0 = std::max(0.0f, Y0 - d1) * coeffs[0];
Y1 = std::max(0.0f, Y1 - d1) * coeffs[0];
Y2 = std::max(0.0f, Y2 - d1) * coeffs[0];
Y3 = std::max(0.0f, Y3 - d1) * coeffs[0];
float ruv = coeffs[4]*V;
float guv = coeffs[3]*V + coeffs[2]*U;
float buv = coeffs[1]*U;
dst0[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y0 + buv);
dst0[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y0 + guv);
dst0[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y0 + ruv);
dst0[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y1 + buv);
dst0[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y1 + guv);
dst0[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y1 + ruv);
dst1[(x+0)*NCHANNELS+0] = saturate_cast<unsigned char>(Y2 + buv);
dst1[(x+0)*NCHANNELS+1] = saturate_cast<unsigned char>(Y2 + guv);
dst1[(x+0)*NCHANNELS+2] = saturate_cast<unsigned char>(Y2 + ruv);
dst1[(x+1)*NCHANNELS+0] = saturate_cast<unsigned char>(Y3 + buv);
dst1[(x+1)*NCHANNELS+1] = saturate_cast<unsigned char>(Y3 + guv);
dst1[(x+1)*NCHANNELS+2] = saturate_cast<unsigned char>(Y3 + ruv);
}
}
}
static void copy_convert_bgr_to_yv12(const VAImage& image, const Mat& bgr, unsigned char* buffer)
{
const float d1 = 16.0f;
const float d2 = 128.0f;
static const float coeffs[8] =
{
0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f,
-0.2909994125f, 0.438999176f, -0.3679990768f, -0.0709991455f
};
CV_CheckEQ(image.format.fourcc, VA_FOURCC_YV12, "Unexpected image format");
CV_CheckEQ(image.num_planes, 3, "");
const size_t dstOffsetY = image.offsets[0];
const size_t dstOffsetV = image.offsets[1];
const size_t dstOffsetU = image.offsets[2];
const size_t dstStepY = image.pitches[0];
const size_t dstStepU = image.pitches[1];
const size_t dstStepV = image.pitches[2];
unsigned char* dstY_ = buffer + dstOffsetY;
unsigned char* dstV_ = buffer + dstOffsetV;
unsigned char* dstU_ = buffer + dstOffsetU;
const size_t srcStep = bgr.step;
for (int y = 0; y < bgr.rows; y += 2)
{
unsigned char* dstY0 = dstY_ + (dstStepY) * y;
unsigned char* dstY1 = dstY0 + dstStepY;
unsigned char* dstV = dstV_ + (dstStepV) * y / 2;
unsigned char* dstU = dstU_ + (dstStepU) * y / 2;
const unsigned char* src0 = bgr.data + (srcStep) * y;
const unsigned char* src1 = src0 + srcStep;
for (int x = 0; x < bgr.cols; x += 2)
{
float B0 = float(src0[(x+0)*NCHANNELS+0]);
float G0 = float(src0[(x+0)*NCHANNELS+1]);
float R0 = float(src0[(x+0)*NCHANNELS+2]);
float B1 = float(src0[(x+1)*NCHANNELS+0]);
float G1 = float(src0[(x+1)*NCHANNELS+1]);
float R1 = float(src0[(x+1)*NCHANNELS+2]);
float B2 = float(src1[(x+0)*NCHANNELS+0]);
float G2 = float(src1[(x+0)*NCHANNELS+1]);
float R2 = float(src1[(x+0)*NCHANNELS+2]);
float B3 = float(src1[(x+1)*NCHANNELS+0]);
float G3 = float(src1[(x+1)*NCHANNELS+1]);
float R3 = float(src1[(x+1)*NCHANNELS+2]);
float Y0 = coeffs[0]*R0 + coeffs[1]*G0 + coeffs[2]*B0 + d1;
float Y1 = coeffs[0]*R1 + coeffs[1]*G1 + coeffs[2]*B1 + d1;
float Y2 = coeffs[0]*R2 + coeffs[1]*G2 + coeffs[2]*B2 + d1;
float Y3 = coeffs[0]*R3 + coeffs[1]*G3 + coeffs[2]*B3 + d1;
float U = coeffs[3]*R0 + coeffs[4]*G0 + coeffs[5]*B0 + d2;
float V = coeffs[5]*R0 + coeffs[6]*G0 + coeffs[7]*B0 + d2;
dstY0[x+0] = saturate_cast<unsigned char>(Y0);
dstY0[x+1] = saturate_cast<unsigned char>(Y1);
dstY1[x+0] = saturate_cast<unsigned char>(Y2);
dstY1[x+1] = saturate_cast<unsigned char>(Y3);
dstU[x/2] = saturate_cast<unsigned char>(U);
dstV[x/2] = saturate_cast<unsigned char>(V);
}
}
}
#endif // HAVE_VA
void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, Size size)
@ -412,9 +569,12 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface,
if (status != VA_STATUS_SUCCESS)
CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed");
CV_Assert(image.format.fourcc == VA_FOURCC_NV12);
copy_convert_bgr_to_nv12(image, m, buffer);
if (image.format.fourcc == VA_FOURCC_NV12)
copy_convert_bgr_to_nv12(image, m, buffer);
if (image.format.fourcc == VA_FOURCC_YV12)
copy_convert_bgr_to_yv12(image, m, buffer);
else
CV_Check((int)image.format.fourcc, image.format.fourcc == VA_FOURCC_NV12 || image.format.fourcc == VA_FOURCC_YV12, "Unexpected image format");
status = vaUnmapBuffer(display, image.buf);
if (status != VA_STATUS_SUCCESS)
@ -510,9 +670,12 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out
if (status != VA_STATUS_SUCCESS)
CV_Error(cv::Error::StsError, "VA-API: vaMapBuffer failed");
CV_Assert(image.format.fourcc == VA_FOURCC_NV12);
copy_convert_nv12_to_bgr(image, buffer, m);
if (image.format.fourcc == VA_FOURCC_NV12)
copy_convert_nv12_to_bgr(image, buffer, m);
if (image.format.fourcc == VA_FOURCC_YV12)
copy_convert_yv12_to_bgr(image, buffer, m);
else
CV_Check((int)image.format.fourcc, image.format.fourcc == VA_FOURCC_NV12 || image.format.fourcc == VA_FOURCC_YV12, "Unexpected image format");
status = vaUnmapBuffer(display, image.buf);
if (status != VA_STATUS_SUCCESS)

@ -2158,4 +2158,71 @@ TEST(Core_Norm, IPP_regression_NORM_L1_16UC3_small)
EXPECT_EQ((double)20*cn, cv::norm(a, b, NORM_L1, mask));
}
TEST(Core_ConvertTo, regression_12121)
{
{
Mat src(4, 64, CV_32SC1, Scalar(-1));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32767));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32768));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(0, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(32768));
Mat dst;
src.convertTo(dst, CV_8U);
EXPECT_EQ(255, dst.at<uchar>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(0, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32767));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(0, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(INT_MIN + 32768));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(0, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
{
Mat src(4, 64, CV_32SC1, Scalar(65536));
Mat dst;
src.convertTo(dst, CV_16U);
EXPECT_EQ(65535, dst.at<ushort>(0, 0)) << "src=" << src.at<int>(0, 0);
}
}
}} // namespace

@ -278,20 +278,12 @@ namespace
{
template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
{
float power;
typedef typename LargerType<T, float>::type LargerType;
LargerType power;
__device__ __forceinline__ T operator()(T e) const
{
return cudev::saturate_cast<T>(__powf((float)e, power));
}
};
template<typename T> struct PowOp<T, true> : unary_function<T, T>
{
float power;
__device__ __forceinline__ T operator()(T e) const
{
T res = cudev::saturate_cast<T>(__powf((float)e, power));
T res = cudev::saturate_cast<T>(__powf(e < 0 ? -e : e, power));
if ((e < 0) && (1 & static_cast<int>(power)))
res *= -1;
@ -299,22 +291,15 @@ namespace
return res;
}
};
template<> struct PowOp<float> : unary_function<float, float>
{
float power;
__device__ __forceinline__ float operator()(float e) const
{
return __powf(::fabs(e), power);
}
};
template<> struct PowOp<double> : unary_function<double, double>
template<typename T> struct PowOp<T, false> : unary_function<T, T>
{
double power;
typedef typename LargerType<T, float>::type LargerType;
LargerType power;
__device__ __forceinline__ double operator()(double e) const
__device__ __forceinline__ T operator()(T e) const
{
return ::pow(::fabs(e), power);
return cudev::saturate_cast<T>(__powf(e, power));
}
};

@ -46,9 +46,9 @@
#include <opencv2/core.hpp>
#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v6 {
#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_34_v7 {
#define CV__DNN_EXPERIMENTAL_NS_END }
namespace cv { namespace dnn { namespace experimental_dnn_v6 { } using namespace experimental_dnn_v6; }}
namespace cv { namespace dnn { namespace experimental_dnn_34_v7 { } using namespace experimental_dnn_34_v7; }}
#else
#define CV__DNN_EXPERIMENTAL_NS_BEGIN
#define CV__DNN_EXPERIMENTAL_NS_END
@ -900,7 +900,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
CV_OUT std::vector<int>& indices,
const float eta = 1.f, const int top_k = 0);
CV_EXPORTS void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
CV_EXPORTS_AS(NMSBoxesRotated) void NMSBoxes(const std::vector<RotatedRect>& bboxes, const std::vector<float>& scores,
const float score_threshold, const float nms_threshold,
CV_OUT std::vector<int>& indices,
const float eta = 1.f, const int top_k = 0);

@ -699,9 +699,9 @@ public:
}
}
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool forceCreate, bool use_half)
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half)
{
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS && !forceCreate)
if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS)
{
Mat bestBlob;
LayerPin bestBlobPin;
@ -747,7 +747,7 @@ public:
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs,
bool forceCreate = false, bool use_half = false)
bool use_half = false)
{
CV_TRACE_FUNCTION();
@ -818,7 +818,7 @@ public:
reuse(ld.inputBlobsId[0], blobPin);
}
else
reuseOrCreate(shapes[index], blobPin, *blobs[index], forceCreate, use_half);
reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half);
}
}
}
@ -1607,7 +1607,6 @@ struct Net::Impl
std::vector<LayerPin> pinsForInternalBlobs;
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs,
preferableBackend == DNN_BACKEND_INFERENCE_ENGINE,
preferableBackend == DNN_BACKEND_OPENCV &&
preferableTarget == DNN_TARGET_OPENCL_FP16);
ld.outputBlobsWrappers.resize(ld.outputBlobs.size());

@ -81,6 +81,7 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{
if (type == "Convolution")
@ -91,13 +92,19 @@ public:
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
const int group = numOutput / outGroupCn;
if (group != 1)
{
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R3)
return preferableTarget == DNN_TARGET_CPU;
#endif
return false;
}
if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16)
return dilation.width == 1 && dilation.height == 1;
return true;
}
}
else
#endif // HAVE_INF_ENGINE
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
}

@ -99,6 +99,13 @@ public:
}
}
}
if (boxes.rows < out.size[0])
{
// left = top = right = bottom = 0
std::vector<cv::Range> dstRanges(4, Range::all());
dstRanges[0] = Range(boxes.rows, out.size[0]);
out(dstRanges).setTo(inp.ptr<float>(0, 0, 0)[0]);
}
}
private:

@ -115,6 +115,7 @@ public:
// It's true whenever predicted bounding boxes and proposals are normalized to [0, 1].
bool _bboxesNormalized;
bool _clip;
bool _groupByClasses;
enum { _numAxes = 4 };
static const std::string _layerName;
@ -183,6 +184,7 @@ public:
_locPredTransposed = getParameter<bool>(params, "loc_pred_transposed", 0, false, false);
_bboxesNormalized = getParameter<bool>(params, "normalized_bbox", 0, false, true);
_clip = getParameter<bool>(params, "clip", 0, false, false);
_groupByClasses = getParameter<bool>(params, "group_by_classes", 0, false, true);
getCodeType(params);
@ -381,7 +383,7 @@ public:
{
count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i]);
allIndices[i], _groupByClasses);
}
CV_Assert(count == numKept);
}
@ -497,7 +499,7 @@ public:
{
count += outputDetections_(i, &outputsData[count * 7],
allDecodedBBoxes[i], allConfidenceScores[i],
allIndices[i]);
allIndices[i], _groupByClasses);
}
CV_Assert(count == numKept);
}
@ -505,9 +507,36 @@ public:
size_t outputDetections_(
const int i, float* outputsData,
const LabelBBox& decodeBBoxes, Mat& confidenceScores,
const std::map<int, std::vector<int> >& indicesMap
const std::map<int, std::vector<int> >& indicesMap,
bool groupByClasses
)
{
std::vector<int> dstIndices;
std::vector<std::pair<float, int> > allScores;
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{
int label = it->first;
if (confidenceScores.rows <= label)
CV_Error_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
const std::vector<float>& scores = confidenceScores.row(label);
const std::vector<int>& indices = it->second;
const int numAllScores = allScores.size();
allScores.reserve(numAllScores + indices.size());
for (size_t j = 0; j < indices.size(); ++j)
{
allScores.push_back(std::make_pair(scores[indices[j]], numAllScores + j));
}
}
if (!groupByClasses)
std::sort(allScores.begin(), allScores.end(), util::SortScorePairDescend<int>);
dstIndices.resize(allScores.size());
for (size_t j = 0; j < dstIndices.size(); ++j)
{
dstIndices[allScores[j].second] = j;
}
size_t count = 0;
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
{
@ -524,14 +553,15 @@ public:
for (size_t j = 0; j < indices.size(); ++j, ++count)
{
int idx = indices[j];
int dstIdx = dstIndices[count];
const util::NormalizedBBox& decode_bbox = label_bboxes->second[idx];
outputsData[count * 7] = i;
outputsData[count * 7 + 1] = label;
outputsData[count * 7 + 2] = scores[idx];
outputsData[count * 7 + 3] = decode_bbox.xmin;
outputsData[count * 7 + 4] = decode_bbox.ymin;
outputsData[count * 7 + 5] = decode_bbox.xmax;
outputsData[count * 7 + 6] = decode_bbox.ymax;
outputsData[dstIdx * 7] = i;
outputsData[dstIdx * 7 + 1] = label;
outputsData[dstIdx * 7 + 2] = scores[idx];
outputsData[dstIdx * 7 + 3] = decode_bbox.xmin;
outputsData[dstIdx * 7 + 4] = decode_bbox.ymin;
outputsData[dstIdx * 7 + 5] = decode_bbox.xmax;
outputsData[dstIdx * 7 + 6] = decode_bbox.ymax;
}
}
return count;

@ -599,7 +599,8 @@ struct ELUFunctor
bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE;
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE ||
backendId == DNN_BACKEND_INFERENCE_ENGINE;
}
void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const
@ -653,8 +654,8 @@ struct ELUFunctor
#ifdef HAVE_INF_ENGINE
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
CV_Error(Error::StsNotImplemented, "ELU");
return InferenceEngine::CNNLayerPtr();
lp.type = "ELU";
return InferenceEngine::CNNLayerPtr(new InferenceEngine::CNNLayer(lp));
}
#endif // HAVE_INF_ENGINE

@ -91,8 +91,8 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_HALIDE && haveHalide() ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
backendId == DNN_BACKEND_HALIDE ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && (preferableTarget != DNN_TARGET_MYRIAD || type == CHANNEL_NRM);
}
#ifdef HAVE_OPENCL

@ -33,9 +33,7 @@ public:
interpolation = params.get<String>("interpolation");
CV_Assert(interpolation == "nearest" || interpolation == "bilinear");
bool alignCorners = params.get<bool>("align_corners", false);
if (alignCorners)
CV_Error(Error::StsNotImplemented, "Resize with align_corners=true is not implemented");
alignCorners = params.get<bool>("align_corners", false);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -66,8 +64,15 @@ public:
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
scaleHeight = static_cast<float>(inputs[0]->size[2]) / outHeight;
scaleWidth = static_cast<float>(inputs[0]->size[3]) / outWidth;
if (alignCorners && outHeight > 1)
scaleHeight = static_cast<float>(inputs[0]->size[2] - 1) / (outHeight - 1);
else
scaleHeight = static_cast<float>(inputs[0]->size[2]) / outHeight;
if (alignCorners && outWidth > 1)
scaleWidth = static_cast<float>(inputs[0]->size[3] - 1) / (outWidth - 1);
else
scaleWidth = static_cast<float>(inputs[0]->size[3]) / outWidth;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
@ -166,6 +171,7 @@ protected:
int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight;
String interpolation;
float scaleWidth, scaleHeight;
bool alignCorners;
};

@ -24,6 +24,7 @@
#define INF_ENGINE_RELEASE_2018R1 2018010000
#define INF_ENGINE_RELEASE_2018R2 2018020000
#define INF_ENGINE_RELEASE_2018R3 2018030000
#ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2018R2 by default")
@ -31,6 +32,7 @@
#endif
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_GE(ver) (((INF_ENGINE_RELEASE) / 10000) >= ((ver) / 10000))
#endif // HAVE_INF_ENGINE

@ -737,11 +737,18 @@ void TFImporter::populateNet(Net dstNet)
int predictedLayout = predictOutputDataLayout(net, layer, data_layouts);
data_layouts[name] = predictedLayout;
if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative")
if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad")
{
// The first node of dilated convolution subgraph.
// Extract input node, dilation rate and paddings.
std::string input = layer.input(0);
StrIntVector next_layers;
if (type == "SpaceToBatchND" || type == "Pad")
{
next_layers = getNextLayers(net, name, "Conv2D");
if (next_layers.empty())
next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
}
if (type == "SpaceToBatchND")
{
// op: "SpaceToBatchND"
@ -762,17 +769,57 @@ void TFImporter::populateNet(Net dstNet)
layerParams.set("pad_h", paddings.at<float>(0));
layerParams.set("pad_w", paddings.at<float>(2));
StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
if (next_layers.empty())
{
next_layers = getNextLayers(net, name, "DepthwiseConv2dNative");
}
CV_Assert(next_layers.size() == 1);
layer = net.node(next_layers[0].second);
layers_to_ignore.insert(next_layers[0].first);
name = layer.name();
type = layer.op();
}
else if (type == "Pad")
{
Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
CV_Assert(paddings.type() == CV_32SC1);
if (paddings.total() == 8)
{
// Perhabs, we have NHWC padding dimensions order.
// N H W C
// 0 1 2 3 4 5 6 7
std::swap(paddings.at<int32_t>(2), paddings.at<int32_t>(6));
std::swap(paddings.at<int32_t>(3), paddings.at<int32_t>(7));
// N C W H
// 0 1 2 3 4 5 6 7
std::swap(paddings.at<int32_t>(4), paddings.at<int32_t>(6));
std::swap(paddings.at<int32_t>(5), paddings.at<int32_t>(7));
// N C H W
// 0 1 2 3 4 5 6 7
}
if (next_layers.empty() || paddings.total() != 8 ||
paddings.at<int32_t>(4) != paddings.at<int32_t>(5) ||
paddings.at<int32_t>(6) != paddings.at<int32_t>(7))
{
// Just a single padding layer.
layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
int id = dstNet.addLayer(name, "Padding", layerParams);
layer_id[name] = id;
connect(layer_id, dstNet, parsePin(input), id, 0);
continue;
}
else
{
// Merge with subsequent convolutional layer.
CV_Assert(next_layers.size() == 1);
layerParams.set("pad_h", paddings.at<int32_t>(4));
layerParams.set("pad_w", paddings.at<int32_t>(6));
layer = net.node(next_layers[0].second);
layers_to_ignore.insert(next_layers[0].first);
name = layer.name();
type = layer.op();
}
}
// For the object detection networks, TensorFlow Object Detection API
// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
@ -784,7 +831,7 @@ void TFImporter::populateNet(Net dstNet)
layerParams.set("bias_term", false);
layerParams.blobs.resize(1);
StrIntVector next_layers = getNextLayers(net, name, "BiasAdd");
next_layers = getNextLayers(net, name, "BiasAdd");
if (next_layers.size() == 1) {
layerParams.set("bias_term", true);
layerParams.blobs.resize(2);
@ -1416,31 +1463,6 @@ void TFImporter::populateNet(Net dstNet)
}
}
}
else if (type == "Pad")
{
Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1));
CV_Assert(paddings.type() == CV_32SC1);
if (paddings.total() == 8)
{
// Perhabs, we have NHWC padding dimensions order.
// N H W C
// 0 1 2 3 4 5 6 7
std::swap(*paddings.ptr<int32_t>(0, 2), *paddings.ptr<int32_t>(0, 6));
std::swap(*paddings.ptr<int32_t>(0, 3), *paddings.ptr<int32_t>(0, 7));
// N C W H
// 0 1 2 3 4 5 6 7
std::swap(*paddings.ptr<int32_t>(0, 4), *paddings.ptr<int32_t>(0, 6));
std::swap(*paddings.ptr<int32_t>(0, 5), *paddings.ptr<int32_t>(0, 7));
// N C H W
// 0 1 2 3 4 5 6 7
}
layerParams.set("paddings", DictValue::arrayInt<int*>((int*)paddings.data, paddings.total()));
int id = dstNet.addLayer(name, "Padding", layerParams);
layer_id[name] = id;
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
}
else if (type == "FusedBatchNorm")
{
// op: "FusedBatchNorm"

@ -222,9 +222,12 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
TEST_P(DNNTestNetwork, OpenFace)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");
#endif
if (backend == DNN_BACKEND_HALIDE ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), "");
}
@ -253,12 +256,19 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
TEST_P(DNNTestNetwork, DenseNet_121)
{
if ((backend == DNN_BACKEND_HALIDE) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL_FP16 ||
target == DNN_TARGET_MYRIAD)))
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "caffe");
float l1 = 0.0, lInf = 0.0;
if (target == DNN_TARGET_OPENCL_FP16)
{
l1 = 9e-3; lInf = 5e-2;
}
else if (target == DNN_TARGET_MYRIAD)
{
l1 = 6e-2; lInf = 0.27;
}
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf);
}
TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)

@ -374,14 +374,6 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
TEST_P(Test_Caffe_nets, Colorization)
{
checkBackend();
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
const float l1 = 4e-4;
const float lInf = 3e-3;
Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
Mat ref = blobFromNPY(_tf("colorization_out.npy"));
Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy"));
@ -398,11 +390,15 @@ TEST_P(Test_Caffe_nets, Colorization)
net.setInput(inp);
Mat out = net.forward();
// Reference output values are in range [-29.1, 69.5]
const double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.21 : 4e-4;
const double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5.3 : 3e-3;
normAssert(out, ref, "", l1, lInf);
}
TEST(Reproducibility_DenseNet_121, Accuracy)
TEST_P(Test_Caffe_nets, DenseNet_121)
{
checkBackend();
const string proto = findDataFile("dnn/DenseNet_121.prototxt", false);
const string model = findDataFile("dnn/DenseNet_121.caffemodel", false);
@ -411,12 +407,23 @@ TEST(Reproducibility_DenseNet_121, Accuracy)
Mat ref = blobFromNPY(_tf("densenet_121_output.npy"));
Net net = readNetFromCaffe(proto, model);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.setInput(inp);
Mat out = net.forward();
normAssert(out, ref);
// Reference is an array of 1000 values from a range [-6.16, 7.9]
float l1 = default_l1, lInf = default_lInf;
if (target == DNN_TARGET_OPENCL_FP16)
{
l1 = 0.017; lInf = 0.067;
}
else if (target == DNN_TARGET_MYRIAD)
{
l1 = 0.097; lInf = 0.52;
}
normAssert(out, ref, "", l1, lInf);
}
TEST(Test_Caffe, multiple_inputs)

@ -177,7 +177,8 @@ TEST_P(DNNTestOpenVINO, models)
Target target = (dnn::Target)(int)get<0>(GetParam());
std::string modelName = get<1>(GetParam());
if (modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16)
if ((modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16) ||
(modelName == "vehicle-license-plate-detection-barrier-0106"))
throw SkipTestException("");
std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32";

@ -127,15 +127,9 @@ TEST_P(Test_Caffe_layers, Softmax)
testLayerUsingCaffeModels("layer_softmax");
}
TEST_P(Test_Caffe_layers, LRN_spatial)
TEST_P(Test_Caffe_layers, LRN)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
testLayerUsingCaffeModels("layer_lrn_spatial");
}
TEST_P(Test_Caffe_layers, LRN_channels)
{
testLayerUsingCaffeModels("layer_lrn_channels");
}

@ -399,8 +399,10 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8)
TEST_P(Test_TensorFlow_nets, EAST_text_detection)
{
checkBackend();
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");
#endif
std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false);
std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false);
@ -425,8 +427,25 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection)
Mat scores = outs[0];
Mat geometry = outs[1];
normAssert(scores, blobFromNPY(refScoresPath), "scores");
normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3);
// Scores are in range [0, 1]. Geometry values are in range [-0.23, 290]
double l1_scores = default_l1, lInf_scores = default_lInf;
double l1_geometry = default_l1, lInf_geometry = default_lInf;
if (target == DNN_TARGET_OPENCL_FP16)
{
lInf_scores = 0.11;
l1_geometry = 0.28; lInf_geometry = 5.94;
}
else if (target == DNN_TARGET_MYRIAD)
{
lInf_scores = 0.214;
l1_geometry = 0.47; lInf_geometry = 15.34;
}
else
{
l1_geometry = 1e-4, lInf_geometry = 3e-3;
}
normAssert(scores, blobFromNPY(refScoresPath), "scores", l1_scores, lInf_scores);
normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", l1_geometry, lInf_geometry);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, dnnBackendsAndTargets());
@ -537,4 +556,56 @@ TEST(Test_TensorFlow, two_inputs)
normAssert(out, firstInput + secondInput);
}
TEST(Test_TensorFlow, Mask_RCNN)
{
std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt", false);
std::string model = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pb", false);
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("dnn/street.png", false));
Mat refDetections = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_out.npy"));
Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy"));
Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setInput(blob);
// Mask-RCNN predicts bounding boxes and segmentation masks.
std::vector<String> outNames(2);
outNames[0] = "detection_out_final";
outNames[1] = "detection_masks";
std::vector<Mat> outs;
net.forward(outs, outNames);
Mat outDetections = outs[0];
Mat outMasks = outs[1];
normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5);
// Output size of masks is NxCxHxW where
// N - number of detected boxes
// C - number of classes (excluding background)
// HxW - segmentation shape
const int numDetections = outDetections.size[2];
int masksSize[] = {1, numDetections, outMasks.size[2], outMasks.size[3]};
Mat masks(4, &masksSize[0], CV_32F);
std::vector<cv::Range> srcRanges(4, cv::Range::all());
std::vector<cv::Range> dstRanges(4, cv::Range::all());
outDetections = outDetections.reshape(1, outDetections.total() / 7);
for (int i = 0; i < numDetections; ++i)
{
// Get a class id for this bounding box and copy mask only for that class.
int classId = static_cast<int>(outDetections.at<float>(i, 1));
srcRanges[0] = dstRanges[1] = cv::Range(i, i + 1);
srcRanges[1] = cv::Range(classId, classId + 1);
outMasks(srcRanges).copyTo(masks(dstRanges));
}
cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()};
normAssert(masks, refMasks(&topRefMasks[0]));
}
}

@ -242,15 +242,23 @@ TEST_P(Test_Torch_layers, net_residual)
runTorchNet("net_residual", "", false, true);
}
typedef testing::TestWithParam<Target> Test_Torch_nets;
class Test_Torch_nets : public DNNTestLayer {};
TEST_P(Test_Torch_nets, OpenFace_accuracy)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("Test is enabled starts from OpenVINO 2018R3");
#endif
checkBackend();
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
const string model = findDataFile("dnn/openface_nn4.small2.v1.t7", false);
Net net = readNetFromTorch(model);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(GetParam());
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat sample = imread(findDataFile("cv/shared/lena.png", false));
Mat sampleF32(sample.size(), CV_32FC3);
@ -264,11 +272,16 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy)
Mat out = net.forward();
Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true);
normAssert(out, outRef);
normAssert(out, outRef, "", default_l1, default_lInf);
}
TEST_P(Test_Torch_nets, ENet_accuracy)
{
checkBackend();
if (backend == DNN_BACKEND_INFERENCE_ENGINE ||
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
Net net;
{
const string model = findDataFile("dnn/Enet-model-best.net", false);
@ -276,8 +289,8 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
ASSERT_TRUE(!net.empty());
}
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(GetParam());
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat sample = imread(_tf("street.png", false));
Mat inputBlob = blobFromImage(sample, 1./255);
@ -314,6 +327,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
// -model models/instance_norm/feathers.t7
TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
{
checkBackend();
std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
"dnn/fast_neural_style_instance_norm_feathers.t7"};
std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
@ -323,8 +337,8 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
const string model = findDataFile(models[i], false);
Net net = readNetFromTorch(model);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(GetParam());
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
@ -341,12 +355,20 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
Mat ref = imread(findDataFile(targets[i]));
Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
normAssert(out, refBlob, "", 0.5, 1.1);
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
{
double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total();
if (target == DNN_TARGET_MYRIAD)
EXPECT_LE(normL1, 4.0f);
else
EXPECT_LE(normL1, 0.6f);
}
else
normAssert(out, refBlob, "", 0.5, 1.1);
}
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets());
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, dnnBackendsAndTargets());
// Test a custom layer
// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest

@ -199,6 +199,7 @@ typedef std::vector<Vec6f> vector_Vec6f;
typedef std::vector<Vec4i> vector_Vec4i;
typedef std::vector<Rect> vector_Rect;
typedef std::vector<Rect2d> vector_Rect2d;
typedef std::vector<RotatedRect> vector_RotatedRect;
typedef std::vector<KeyPoint> vector_KeyPoint;
typedef std::vector<Mat> vector_Mat;
typedef std::vector<std::vector<Mat> > vector_vector_Mat;
@ -1643,6 +1644,18 @@ template<> struct pyopencvVecConverter<String>
}
};
template<> struct pyopencvVecConverter<RotatedRect>
{
static bool to(PyObject* obj, std::vector<RotatedRect>& value, const ArgInfo info)
{
return pyopencv_to_generic_vec(obj, value, info);
}
static PyObject* from(const std::vector<RotatedRect>& value)
{
return pyopencv_from_generic_vec(value);
}
};
template<>
bool pyopencv_to(PyObject *obj, TermCriteria& dst, const char *name)
{

@ -165,8 +165,8 @@ static Mat _localAffineEstimate(const std::vector<Point2f>& shape1, const std::v
}
else
{
therow.at<float>(0,0)=-shape1[contPt].y;
therow.at<float>(0,1)=shape1[contPt].x;
therow.at<float>(0,0)=shape1[contPt].y;
therow.at<float>(0,1)=-shape1[contPt].x;
therow.at<float>(0,3)=1;
therow.row(0).copyTo(matM.row(ii));
matP.at<float>(ii,0) = shape2[contPt].y;

@ -921,6 +921,14 @@ inline static void recordPropertyVerbose(const std::string & property,
}
}
inline static void recordPropertyVerbose(const std::string& property, const std::string& msg,
const char* value, const char* build_value = NULL)
{
return recordPropertyVerbose(property, msg,
value ? std::string(value) : std::string(),
build_value ? std::string(build_value) : std::string());
}
#ifdef _DEBUG
#define CV_TEST_BUILD_CONFIG "Debug"
#else

@ -107,7 +107,7 @@ class XINECapture : public IVideoCapture
bool open(const char *filename)
{
CV_Assert(!xine, !stream, !vo_port);
CV_Assert_N(!xine, !stream, !vo_port);
char configfile[2048] = {0};
xine = xine_new();
@ -207,7 +207,7 @@ class XINECapture : public IVideoCapture
double getProperty(int property_id) const CV_OVERRIDE
{
CV_Assert(xine, vo_port, stream);
CV_Assert_N(xine, vo_port, stream);
int pos_t, pos_l, length;
bool res = (bool)xine_get_pos_length(stream, &pos_l, &pos_t, &length);
@ -240,7 +240,7 @@ class XINECapture : public IVideoCapture
protected:
bool oldSeekFrame(int f)
{
CV_Assert(xine, vo_port, stream);
CV_Assert_N(xine, vo_port, stream);
// no need to seek if we are already there...
if (f == frame_number)
{
@ -290,7 +290,7 @@ protected:
bool seekFrame(int f)
{
CV_Assert(xine, vo_port, stream);
CV_Assert_N(xine, vo_port, stream);
if (seekable)
{
int new_time = (int)((f + 1) * (float)frame_duration);
@ -309,7 +309,7 @@ protected:
bool seekTime(int t)
{
CV_Assert(xine, vo_port, stream);
CV_Assert_N(xine, vo_port, stream);
if (seekable)
{
if (xine_play(stream, 0, t))
@ -328,7 +328,7 @@ protected:
bool seekRatio(double ratio)
{
CV_Assert(xine, vo_port, stream);
CV_Assert_N(xine, vo_port, stream);
if (ratio > 1 || ratio < 0)
return false;
if (seekable)

@ -301,6 +301,7 @@ void cv::viz::Widget3D::applyTransform(const Affine3d &transform)
vtkSmartPointer<vtkPolyDataMapper> mapper = vtkPolyDataMapper::SafeDownCast(actor->GetMapper());
CV_Assert("Widget doesn't have a polydata mapper" && mapper);
mapper->Update(); // #10945
VtkUtils::SetInputData(mapper, VtkUtils::TransformPolydata(mapper->GetInput(), transform));
mapper->Update();
}

@ -106,9 +106,13 @@ class ABI:
self.cmake_vars = dict(
ANDROID_STL="gnustl_static",
ANDROID_ABI=self.name,
ANDROID_TOOLCHAIN_NAME=toolchain,
ANDROID_PLATFORM_ID=platform_id,
)
if toolchain is not None:
self.cmake_vars['ANDROID_TOOLCHAIN_NAME'] = toolchain
else:
self.cmake_vars['ANDROID_TOOLCHAIN'] = 'clang'
self.cmake_vars['ANDROID_STL'] = 'c++_static'
if ndk_api_level:
self.cmake_vars['ANDROID_NATIVE_API_LEVEL'] = ndk_api_level
self.cmake_vars.update(cmake_vars)
@ -206,7 +210,7 @@ class Builder:
# Add extra data
apkxmldest = check_dir(os.path.join(apkdest, "res", "xml"), create=True)
apklibdest = check_dir(os.path.join(apkdest, "libs", abi.name), create=True)
for ver, d in self.extra_packs + [("3.4.2", os.path.join(self.libdest, "lib"))]:
for ver, d in self.extra_packs + [("3.4.3", os.path.join(self.libdest, "lib"))]:
r = ET.Element("library", attrib={"version": ver})
log.info("Adding libraries from %s", d)

@ -0,0 +1,6 @@
ABIs = [
ABI("2", "armeabi-v7a", None, cmake_vars=dict(ANDROID_ABI='armeabi-v7a with NEON')),
ABI("3", "arm64-v8a", None),
ABI("5", "x86_64", None),
ABI("4", "x86", None),
]

@ -1,8 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="org.opencv.engine"
android:versionCode="342@ANDROID_PLATFORM_ID@"
android:versionName="3.42">
android:versionCode="343@ANDROID_PLATFORM_ID@"
android:versionName="3.43">
<uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" android:targetSdkVersion="22"/>
<uses-feature android:name="android.hardware.touchscreen" android:required="false"/>

@ -137,7 +137,7 @@ public class OpenCVEngineService extends Service {
@Override
public int getEngineVersion() throws RemoteException {
int version = 3420;
int version = 3430;
try {
version = getPackageManager().getPackageInfo(getPackageName(), 0).versionCode;
} catch (NameNotFoundException e) {

@ -12,7 +12,7 @@ manually using adb tool:
adb install <path-to-OpenCV-sdk>/apk/OpenCV_<version>_Manager_<app_version>_<platform>.apk
Example: OpenCV_3.4.2-dev_Manager_3.42_armeabi-v7a.apk
Example: OpenCV_3.4.3-dev_Manager_3.43_armeabi-v7a.apk
Use the list of platforms below to determine proper OpenCV Manager package for your device:

@ -4,7 +4,7 @@
<parent>
<groupId>org.opencv</groupId>
<artifactId>opencv-parent</artifactId>
<version>3.4.2</version>
<version>3.4.3</version>
</parent>
<groupId>org.opencv</groupId>
<artifactId>opencv-it</artifactId>

@ -4,7 +4,7 @@
<parent>
<groupId>org.opencv</groupId>
<artifactId>opencv-parent</artifactId>
<version>3.4.2</version>
<version>3.4.3</version>
</parent>
<groupId>org.opencv</groupId>
<artifactId>opencv</artifactId>

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.opencv</groupId>
<artifactId>opencv-parent</artifactId>
<version>3.4.2</version>
<version>3.4.3</version>
<packaging>pom</packaging>
<name>OpenCV Parent POM</name>
<licenses>

@ -0,0 +1,143 @@
import cv2 as cv
import argparse
import numpy as np
parser = argparse.ArgumentParser(description=
'Use this script to run Mask-RCNN object detection and semantic '
'segmentation network from TensorFlow Object Detection API.')
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
parser.add_argument('--model', required=True, help='Path to a .pb file with weights.')
parser.add_argument('--config', required=True, help='Path to a .pxtxt file contains network configuration.')
parser.add_argument('--classes', help='Optional path to a text file with names of classes.')
parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. '
'An every color is represented with three values from 0 to 255 in BGR channels order.')
parser.add_argument('--width', type=int, default=800,
help='Preprocess input image by resizing to a specific width.')
parser.add_argument('--height', type=int, default=800,
help='Preprocess input image by resizing to a specific height.')
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
args = parser.parse_args()
np.random.seed(324)
# Load names of classes
classes = None
if args.classes:
with open(args.classes, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Load colors
colors = None
if args.colors:
with open(args.colors, 'rt') as f:
colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')]
legend = None
def showLegend(classes):
global legend
if not classes is None and legend is None:
blockHeight = 30
assert(len(classes) == len(colors))
legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8)
for i in range(len(classes)):
block = legend[i * blockHeight:(i + 1) * blockHeight]
block[:,:] = colors[i]
cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.namedWindow('Legend', cv.WINDOW_NORMAL)
cv.imshow('Legend', legend)
classes = None
def drawBox(frame, classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0))
label = '%.2f' % conf
# Print a label of class.
if classes:
assert(classId < len(classes))
label = '%s: %s' % (classes[classId], label)
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED)
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
# Load a network
net = cv.dnn.readNet(args.model, args.config)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
winName = 'Mask-RCNN in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0)
legend = None
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
cv.waitKey()
break
frameH = frame.shape[0]
frameW = frame.shape[1]
# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, size=(args.width, args.height), swapRB=True, crop=False)
# Run a model
net.setInput(blob)
boxes, masks = net.forward(['detection_out_final', 'detection_masks'])
numClasses = masks.shape[1]
numDetections = boxes.shape[2]
# Draw segmentation
if not colors:
# Generate colors
colors = [np.array([0, 0, 0], np.uint8)]
for i in range(1, numClasses + 1):
colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2)
del colors[0]
boxesToDraw = []
for i in range(numDetections):
box = boxes[0, 0, i]
mask = masks[i]
score = box[2]
if score > args.thr:
classId = int(box[1])
left = int(frameW * box[3])
top = int(frameH * box[4])
right = int(frameW * box[5])
bottom = int(frameH * box[6])
left = max(0, min(left, frameW - 1))
top = max(0, min(top, frameH - 1))
right = max(0, min(right, frameW - 1))
bottom = max(0, min(bottom, frameH - 1))
boxesToDraw.append([frame, classId, score, left, top, right, bottom])
classMask = mask[classId]
classMask = cv.resize(classMask, (right - left + 1, bottom - top + 1))
mask = (classMask > 0.5)
roi = frame[top:bottom+1, left:right+1][mask]
frame[top:bottom+1, left:right+1][mask] = (0.7 * colors[classId] + 0.3 * roi).astype(np.uint8)
for box in boxesToDraw:
drawBox(*box)
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
showLegend(classes)
cv.imshow(winName, frame)

@ -23,3 +23,98 @@ def addConstNode(name, values, graph_def):
node.op = 'Const'
text_format.Merge(tensorMsg(values), node.attr["value"])
graph_def.node.extend([node])
def addSlice(inp, out, begins, sizes, graph_def):
beginsNode = NodeDef()
beginsNode.name = out + '/begins'
beginsNode.op = 'Const'
text_format.Merge(tensorMsg(begins), beginsNode.attr["value"])
graph_def.node.extend([beginsNode])
sizesNode = NodeDef()
sizesNode.name = out + '/sizes'
sizesNode.op = 'Const'
text_format.Merge(tensorMsg(sizes), sizesNode.attr["value"])
graph_def.node.extend([sizesNode])
sliced = NodeDef()
sliced.name = out
sliced.op = 'Slice'
sliced.input.append(inp)
sliced.input.append(beginsNode.name)
sliced.input.append(sizesNode.name)
graph_def.node.extend([sliced])
def addReshape(inp, out, shape, graph_def):
shapeNode = NodeDef()
shapeNode.name = out + '/shape'
shapeNode.op = 'Const'
text_format.Merge(tensorMsg(shape), shapeNode.attr["value"])
graph_def.node.extend([shapeNode])
reshape = NodeDef()
reshape.name = out
reshape.op = 'Reshape'
reshape.input.append(inp)
reshape.input.append(shapeNode.name)
graph_def.node.extend([reshape])
def addSoftMax(inp, out, graph_def):
softmax = NodeDef()
softmax.name = out
softmax.op = 'Softmax'
text_format.Merge('i: -1', softmax.attr['axis'])
softmax.input.append(inp)
graph_def.node.extend([softmax])
def addFlatten(inp, out, graph_def):
flatten = NodeDef()
flatten.name = out
flatten.op = 'Flatten'
flatten.input.append(inp)
graph_def.node.extend([flatten])
# Removes Identity nodes
def removeIdentity(graph_def):
identities = {}
for node in graph_def.node:
if node.op == 'Identity':
identities[node.name] = node.input[0]
graph_def.node.remove(node)
for node in graph_def.node:
for i in range(len(node.input)):
if node.input[i] in identities:
node.input[i] = identities[node.input[i]]
def removeUnusedNodesAndAttrs(to_remove, graph_def):
unusedAttrs = ['T', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu',
'Index', 'Tperm', 'is_training', 'Tpaddings']
removedNodes = []
for i in reversed(range(len(graph_def.node))):
op = graph_def.node[i].op
name = graph_def.node[i].name
if op == 'Const' or to_remove(name, op):
if op != 'Const':
removedNodes.append(name)
del graph_def.node[i]
else:
for attr in unusedAttrs:
if attr in graph_def.node[i].attr:
del graph_def.node[i].attr[attr]
# Remove references to removed nodes except Const nodes.
for node in graph_def.node:
for i in reversed(range(len(node.input))):
if node.input[i] in removedNodes:
del node.input[i]

@ -6,7 +6,7 @@ from tensorflow.core.framework.node_def_pb2 import NodeDef
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format
from tf_text_graph_common import tensorMsg, addConstNode
from tf_text_graph_common import *
parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'SSD model from TensorFlow Object Detection API. '
@ -37,50 +37,17 @@ scopesToIgnore = ('FirstStageFeatureExtractor/Assert',
'FirstStageFeatureExtractor/GreaterEqual',
'FirstStageFeatureExtractor/LogicalAnd')
unusedAttrs = ['T', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu',
'Index', 'Tperm', 'is_training', 'Tpaddings']
# Read the graph.
with tf.gfile.FastGFile(args.input, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# Removes Identity nodes
def removeIdentity():
identities = {}
for node in graph_def.node:
if node.op == 'Identity':
identities[node.name] = node.input[0]
graph_def.node.remove(node)
for node in graph_def.node:
for i in range(len(node.input)):
if node.input[i] in identities:
node.input[i] = identities[node.input[i]]
removeIdentity()
removedNodes = []
for i in reversed(range(len(graph_def.node))):
op = graph_def.node[i].op
name = graph_def.node[i].name
removeIdentity(graph_def)
if op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep):
if op != 'Const':
removedNodes.append(name)
def to_remove(name, op):
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep)
del graph_def.node[i]
else:
for attr in unusedAttrs:
if attr in graph_def.node[i].attr:
del graph_def.node[i].attr[attr]
# Remove references to removed nodes except Const nodes.
for node in graph_def.node:
for i in reversed(range(len(node.input))):
if node.input[i] in removedNodes:
del node.input[i]
removeUnusedNodesAndAttrs(to_remove, graph_def)
# Connect input node to the first layer
@ -95,68 +62,18 @@ while True:
if node.op == 'CropAndResize':
break
def addSlice(inp, out, begins, sizes):
beginsNode = NodeDef()
beginsNode.name = out + '/begins'
beginsNode.op = 'Const'
text_format.Merge(tensorMsg(begins), beginsNode.attr["value"])
graph_def.node.extend([beginsNode])
sizesNode = NodeDef()
sizesNode.name = out + '/sizes'
sizesNode.op = 'Const'
text_format.Merge(tensorMsg(sizes), sizesNode.attr["value"])
graph_def.node.extend([sizesNode])
sliced = NodeDef()
sliced.name = out
sliced.op = 'Slice'
sliced.input.append(inp)
sliced.input.append(beginsNode.name)
sliced.input.append(sizesNode.name)
graph_def.node.extend([sliced])
def addReshape(inp, out, shape):
shapeNode = NodeDef()
shapeNode.name = out + '/shape'
shapeNode.op = 'Const'
text_format.Merge(tensorMsg(shape), shapeNode.attr["value"])
graph_def.node.extend([shapeNode])
reshape = NodeDef()
reshape.name = out
reshape.op = 'Reshape'
reshape.input.append(inp)
reshape.input.append(shapeNode.name)
graph_def.node.extend([reshape])
def addSoftMax(inp, out):
softmax = NodeDef()
softmax.name = out
softmax.op = 'Softmax'
text_format.Merge('i: -1', softmax.attr['axis'])
softmax.input.append(inp)
graph_def.node.extend([softmax])
def addFlatten(inp, out):
flatten = NodeDef()
flatten.name = out
flatten.op = 'Flatten'
flatten.input.append(inp)
graph_def.node.extend([flatten])
addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd',
'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2])
'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2], graph_def)
addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1',
'FirstStageBoxPredictor/ClassPredictor/softmax') # Compare with Reshape_4
'FirstStageBoxPredictor/ClassPredictor/softmax', graph_def) # Compare with Reshape_4
addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax',
'FirstStageBoxPredictor/ClassPredictor/softmax/flatten')
'FirstStageBoxPredictor/ClassPredictor/softmax/flatten', graph_def)
# Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd
addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd',
'FirstStageBoxPredictor/BoxEncodingPredictor/flatten')
'FirstStageBoxPredictor/BoxEncodingPredictor/flatten', graph_def)
proposals = NodeDef()
proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized)
@ -218,14 +135,14 @@ graph_def.node.extend([clipByValueNode])
for node in reversed(topNodes):
graph_def.node.extend([node])
addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax')
addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax', graph_def)
addSlice('SecondStageBoxPredictor/Reshape_1/softmax',
'SecondStageBoxPredictor/Reshape_1/slice',
[0, 0, 1], [-1, -1, -1])
[0, 0, 1], [-1, -1, -1], graph_def)
addReshape('SecondStageBoxPredictor/Reshape_1/slice',
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1])
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)
# Replace Flatten subgraph onto a single node.
for i in reversed(range(len(graph_def.node))):
@ -255,7 +172,7 @@ for node in graph_def.node:
################################################################################
### Postprocessing
################################################################################
addSlice('detection_out/clip_by_value', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4])
addSlice('detection_out/clip_by_value', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4], graph_def)
variance = NodeDef()
variance.name = 'proposals/variance'
@ -271,8 +188,8 @@ varianceEncoder.input.append(variance.name)
text_format.Merge('i: 2', varianceEncoder.attr["axis"])
graph_def.node.extend([varianceEncoder])
addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1])
addFlatten('variance_encoded', 'variance_encoded/flatten')
addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1], graph_def)
addFlatten('variance_encoded', 'variance_encoded/flatten', graph_def)
detectionOut = NodeDef()
detectionOut.name = 'detection_out_final'

@ -0,0 +1,230 @@
import argparse
import numpy as np
import tensorflow as tf
from tensorflow.core.framework.node_def_pb2 import NodeDef
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format
from tf_text_graph_common import *
parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'Mask-RCNN model from TensorFlow Object Detection API. '
'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.')
parser.add_argument('--input', required=True, help='Path to frozen TensorFlow graph.')
parser.add_argument('--output', required=True, help='Path to output text graph.')
parser.add_argument('--num_classes', default=90, type=int, help='Number of trained classes.')
parser.add_argument('--scales', default=[0.25, 0.5, 1.0, 2.0], type=float, nargs='+',
help='Hyper-parameter of grid_anchor_generator from a config file.')
parser.add_argument('--aspect_ratios', default=[0.5, 1.0, 2.0], type=float, nargs='+',
help='Hyper-parameter of grid_anchor_generator from a config file.')
parser.add_argument('--features_stride', default=16, type=float, nargs='+',
help='Hyper-parameter from a config file.')
args = parser.parse_args()
scopesToKeep = ('FirstStageFeatureExtractor', 'Conv',
'FirstStageBoxPredictor/BoxEncodingPredictor',
'FirstStageBoxPredictor/ClassPredictor',
'CropAndResize',
'MaxPool2D',
'SecondStageFeatureExtractor',
'SecondStageBoxPredictor',
'Preprocessor/sub',
'Preprocessor/mul',
'image_tensor')
scopesToIgnore = ('FirstStageFeatureExtractor/Assert',
'FirstStageFeatureExtractor/Shape',
'FirstStageFeatureExtractor/strided_slice',
'FirstStageFeatureExtractor/GreaterEqual',
'FirstStageFeatureExtractor/LogicalAnd')
# Read the graph.
with tf.gfile.FastGFile(args.input, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
removeIdentity(graph_def)
def to_remove(name, op):
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep)
removeUnusedNodesAndAttrs(to_remove, graph_def)
# Connect input node to the first layer
assert(graph_def.node[0].op == 'Placeholder')
graph_def.node[1].input.insert(0, graph_def.node[0].name)
# Temporarily remove top nodes.
topNodes = []
numCropAndResize = 0
while True:
node = graph_def.node.pop()
topNodes.append(node)
if node.op == 'CropAndResize':
numCropAndResize += 1
if numCropAndResize == 2:
break
addReshape('FirstStageBoxPredictor/ClassPredictor/BiasAdd',
'FirstStageBoxPredictor/ClassPredictor/reshape_1', [0, -1, 2], graph_def)
addSoftMax('FirstStageBoxPredictor/ClassPredictor/reshape_1',
'FirstStageBoxPredictor/ClassPredictor/softmax', graph_def) # Compare with Reshape_4
addFlatten('FirstStageBoxPredictor/ClassPredictor/softmax',
'FirstStageBoxPredictor/ClassPredictor/softmax/flatten', graph_def)
# Compare with FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd
addFlatten('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd',
'FirstStageBoxPredictor/BoxEncodingPredictor/flatten', graph_def)
proposals = NodeDef()
proposals.name = 'proposals' # Compare with ClipToWindow/Gather/Gather (NOTE: normalized)
proposals.op = 'PriorBox'
proposals.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/BiasAdd')
proposals.input.append(graph_def.node[0].name) # image_tensor
text_format.Merge('b: false', proposals.attr["flip"])
text_format.Merge('b: true', proposals.attr["clip"])
text_format.Merge('f: %f' % args.features_stride, proposals.attr["step"])
text_format.Merge('f: 0.0', proposals.attr["offset"])
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), proposals.attr["variance"])
widths = []
heights = []
for a in args.aspect_ratios:
for s in args.scales:
ar = np.sqrt(a)
heights.append((args.features_stride**2) * s / ar)
widths.append((args.features_stride**2) * s * ar)
text_format.Merge(tensorMsg(widths), proposals.attr["width"])
text_format.Merge(tensorMsg(heights), proposals.attr["height"])
graph_def.node.extend([proposals])
# Compare with Reshape_5
detectionOut = NodeDef()
detectionOut.name = 'detection_out'
detectionOut.op = 'DetectionOutput'
detectionOut.input.append('FirstStageBoxPredictor/BoxEncodingPredictor/flatten')
detectionOut.input.append('FirstStageBoxPredictor/ClassPredictor/softmax/flatten')
detectionOut.input.append('proposals')
text_format.Merge('i: 2', detectionOut.attr['num_classes'])
text_format.Merge('b: true', detectionOut.attr['share_location'])
text_format.Merge('i: 0', detectionOut.attr['background_label_id'])
text_format.Merge('f: 0.7', detectionOut.attr['nms_threshold'])
text_format.Merge('i: 6000', detectionOut.attr['top_k'])
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
text_format.Merge('i: 100', detectionOut.attr['keep_top_k'])
text_format.Merge('b: true', detectionOut.attr['clip'])
graph_def.node.extend([detectionOut])
# Save as text.
for node in reversed(topNodes):
if node.op != 'CropAndResize':
graph_def.node.extend([node])
topNodes.pop()
else:
if numCropAndResize == 1:
break
else:
graph_def.node.extend([node])
topNodes.pop()
numCropAndResize -= 1
addSoftMax('SecondStageBoxPredictor/Reshape_1', 'SecondStageBoxPredictor/Reshape_1/softmax', graph_def)
addSlice('SecondStageBoxPredictor/Reshape_1/softmax',
'SecondStageBoxPredictor/Reshape_1/slice',
[0, 0, 1], [-1, -1, -1], graph_def)
addReshape('SecondStageBoxPredictor/Reshape_1/slice',
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)
# Replace Flatten subgraph onto a single node.
for i in reversed(range(len(graph_def.node))):
if graph_def.node[i].op == 'CropAndResize':
graph_def.node[i].input.insert(1, 'detection_out')
if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape':
addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def)
graph_def.node[i].input.pop()
graph_def.node[i].input.append('SecondStageBoxPredictor/Reshape/shape2')
if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape',
'SecondStageBoxPredictor/Flatten/flatten/strided_slice',
'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape']:
del graph_def.node[i]
for node in graph_def.node:
if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape':
node.op = 'Flatten'
node.input.pop()
if node.name in ['FirstStageBoxPredictor/BoxEncodingPredictor/Conv2D',
'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']:
text_format.Merge('b: true', node.attr["loc_pred_transposed"])
################################################################################
### Postprocessing
################################################################################
addSlice('detection_out', 'detection_out/slice', [0, 0, 0, 3], [-1, -1, -1, 4], graph_def)
variance = NodeDef()
variance.name = 'proposals/variance'
variance.op = 'Const'
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), variance.attr["value"])
graph_def.node.extend([variance])
varianceEncoder = NodeDef()
varianceEncoder.name = 'variance_encoded'
varianceEncoder.op = 'Mul'
varianceEncoder.input.append('SecondStageBoxPredictor/Reshape')
varianceEncoder.input.append(variance.name)
text_format.Merge('i: 2', varianceEncoder.attr["axis"])
graph_def.node.extend([varianceEncoder])
addReshape('detection_out/slice', 'detection_out/slice/reshape', [1, 1, -1], graph_def)
addFlatten('variance_encoded', 'variance_encoded/flatten', graph_def)
detectionOut = NodeDef()
detectionOut.name = 'detection_out_final'
detectionOut.op = 'DetectionOutput'
detectionOut.input.append('variance_encoded/flatten')
detectionOut.input.append('SecondStageBoxPredictor/Reshape_1/Reshape')
detectionOut.input.append('detection_out/slice/reshape')
text_format.Merge('i: %d' % args.num_classes, detectionOut.attr['num_classes'])
text_format.Merge('b: false', detectionOut.attr['share_location'])
text_format.Merge('i: %d' % (args.num_classes + 1), detectionOut.attr['background_label_id'])
text_format.Merge('f: 0.6', detectionOut.attr['nms_threshold'])
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
text_format.Merge('i: 100', detectionOut.attr['keep_top_k'])
text_format.Merge('b: true', detectionOut.attr['clip'])
text_format.Merge('b: true', detectionOut.attr['variance_encoded_in_target'])
text_format.Merge('f: 0.3', detectionOut.attr['confidence_threshold'])
text_format.Merge('b: false', detectionOut.attr['group_by_classes'])
graph_def.node.extend([detectionOut])
for node in reversed(topNodes):
graph_def.node.extend([node])
for i in reversed(range(len(graph_def.node))):
if graph_def.node[i].op == 'CropAndResize':
graph_def.node[i].input.insert(1, 'detection_out_final')
break
graph_def.node[-1].name = 'detection_masks'
graph_def.node[-1].op = 'Sigmoid'
graph_def.node[-1].input.pop()
tf.train.write_graph(graph_def, "", args.output, as_text=True)

@ -15,7 +15,7 @@ from math import sqrt
from tensorflow.core.framework.node_def_pb2 import NodeDef
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format
from tf_text_graph_common import tensorMsg, addConstNode
from tf_text_graph_common import *
parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'SSD model from TensorFlow Object Detection API. '
@ -41,10 +41,6 @@ args = parser.parse_args()
keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu6', 'Placeholder', 'FusedBatchNorm',
'DepthwiseConv2dNative', 'ConcatV2', 'Mul', 'MaxPool', 'AvgPool', 'Identity']
# Nodes attributes that could be removed because they are not used during import.
unusedAttrs = ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim', 'use_cudnn_on_gpu',
'Index', 'Tperm', 'is_training', 'Tpaddings']
# Node with which prefixes should be removed
prefixesToRemove = ('MultipleGridAnchorGenerator/', 'Postprocessor/', 'Preprocessor/')
@ -66,7 +62,6 @@ def getUnconnectedNodes():
unconnected.remove(inp)
return unconnected
removedNodes = []
# Detect unfused batch normalization nodes and fuse them.
def fuse_batch_normalization():
@ -118,41 +113,13 @@ def fuse_batch_normalization():
fuse_batch_normalization()
# Removes Identity nodes
def removeIdentity():
identities = {}
for node in graph_def.node:
if node.op == 'Identity':
identities[node.name] = node.input[0]
graph_def.node.remove(node)
for node in graph_def.node:
for i in range(len(node.input)):
if node.input[i] in identities:
node.input[i] = identities[node.input[i]]
removeIdentity()
# Remove extra nodes and attributes.
for i in reversed(range(len(graph_def.node))):
op = graph_def.node[i].op
name = graph_def.node[i].name
removeIdentity(graph_def)
if (not op in keepOps) or name.startswith(prefixesToRemove):
if op != 'Const':
removedNodes.append(name)
def to_remove(name, op):
return (not op in keepOps) or name.startswith(prefixesToRemove)
del graph_def.node[i]
else:
for attr in unusedAttrs:
if attr in graph_def.node[i].attr:
del graph_def.node[i].attr[attr]
removeUnusedNodesAndAttrs(to_remove, graph_def)
# Remove references to removed nodes except Const nodes.
for node in graph_def.node:
for i in reversed(range(len(node.input))):
if node.input[i] in removedNodes:
del node.input[i]
# Connect input node to the first layer
assert(graph_def.node[0].op == 'Placeholder')
@ -175,8 +142,8 @@ def addConcatNode(name, inputs, axisNodeName):
concat.input.append(axisNodeName)
graph_def.node.extend([concat])
addConstNode('concat/axis_flatten', [-1])
addConstNode('PriorBox/concat/axis', [-2])
addConstNode('concat/axis_flatten', [-1], graph_def)
addConstNode('PriorBox/concat/axis', [-2], graph_def)
for label in ['ClassPredictor', 'BoxEncodingPredictor' if args.box_predictor is 'convolutional' else 'BoxPredictor']:
concatInputs = []

Loading…
Cancel
Save