Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/13673/head
Alexander Alekhin 6 years ago
commit 631b246881
  1. 2
      CMakeLists.txt
  2. 1
      doc/CMakeLists.txt
  3. 8
      doc/Doxyfile.in
  4. 28
      doc/py_tutorials/py_feature2d/py_matcher/py_matcher.markdown
  5. 15
      modules/calib3d/src/circlesgrid.cpp
  6. 8
      modules/calib3d/src/homography_decomp.cpp
  7. 5
      modules/calib3d/src/quadsubpix.cpp
  8. 6
      modules/calib3d/test/test_cameracalibration.cpp
  9. 2
      modules/calib3d/test/test_cameracalibration_artificial.cpp
  10. 16
      modules/calib3d/test/test_chessboardgenerator.cpp
  11. 32
      modules/calib3d/test/test_homography_decomp.cpp
  12. 6
      modules/calib3d/test/test_solvepnp_ransac.cpp
  13. 10
      modules/core/include/opencv2/core/cvdef.h
  14. 2
      modules/dnn/include/opencv2/dnn/version.hpp
  15. 13
      modules/dnn/perf/perf_net.cpp
  16. 74
      modules/dnn/src/dnn.cpp
  17. 9
      modules/dnn/src/layers/batch_norm_layer.cpp
  18. 6
      modules/dnn/src/layers/blank_layer.cpp
  19. 9
      modules/dnn/src/layers/concat_layer.cpp
  20. 102
      modules/dnn/src/layers/convolution_layer.cpp
  21. 15
      modules/dnn/src/layers/crop_layer.cpp
  22. 20
      modules/dnn/src/layers/detection_output_layer.cpp
  23. 74
      modules/dnn/src/layers/elementwise_layers.cpp
  24. 25
      modules/dnn/src/layers/eltwise_layer.cpp
  25. 13
      modules/dnn/src/layers/flatten_layer.cpp
  26. 13
      modules/dnn/src/layers/fully_connected_layer.cpp
  27. 12
      modules/dnn/src/layers/lrn_layer.cpp
  28. 8
      modules/dnn/src/layers/mvn_layer.cpp
  29. 44
      modules/dnn/src/layers/normalize_bbox_layer.cpp
  30. 6
      modules/dnn/src/layers/permute_layer.cpp
  31. 43
      modules/dnn/src/layers/pooling_layer.cpp
  32. 53
      modules/dnn/src/layers/prior_box_layer.cpp
  33. 23
      modules/dnn/src/layers/proposal_layer.cpp
  34. 6
      modules/dnn/src/layers/reorg_layer.cpp
  35. 24
      modules/dnn/src/layers/reshape_layer.cpp
  36. 41
      modules/dnn/src/layers/resize_layer.cpp
  37. 24
      modules/dnn/src/layers/scale_layer.cpp
  38. 19
      modules/dnn/src/layers/slice_layer.cpp
  39. 8
      modules/dnn/src/layers/softmax_layer.cpp
  40. 137
      modules/dnn/src/op_inf_engine.cpp
  41. 66
      modules/dnn/src/op_inf_engine.hpp
  42. 8
      modules/dnn/src/torch/THGeneral.cpp
  43. 4
      modules/dnn/test/test_backends.cpp
  44. 2
      modules/dnn/test/test_darknet_importer.cpp
  45. 3
      modules/dnn/test/test_halide_layers.cpp
  46. 4
      modules/dnn/test/test_layers.cpp
  47. 10
      modules/dnn/test/test_onnx_importer.cpp
  48. 15
      modules/dnn/test/test_tf_importer.cpp
  49. 9
      modules/dnn/test/test_torch_importer.cpp
  50. 13
      modules/features2d/src/blobdetector.cpp
  51. 22
      modules/imgproc/perf/perf_contours.cpp
  52. 2
      modules/imgproc/perf/perf_integral.cpp
  53. 2
      modules/imgproc/src/filter.cpp
  54. 10
      modules/imgproc/src/fixedpoint.inl.hpp
  55. 571
      modules/imgproc/src/morph.cpp
  56. 124
      modules/imgproc/src/shapedescr.cpp
  57. 262
      modules/imgproc/src/sumpixels.avx512_skx.cpp
  58. 34
      modules/imgproc/src/sumpixels.cpp
  59. 25
      modules/imgproc/src/sumpixels.hpp
  60. 452
      modules/imgproc/src/thresh.cpp
  61. 10
      modules/imgproc/test/test_color.cpp
  62. 6
      modules/objdetect/src/qrcode.cpp
  63. 22
      modules/video/src/optflowgf.cpp
  64. 12
      samples/dnn/models.yml
  65. 2
      samples/dnn/tf_text_graph_common.py
  66. 54
      samples/dnn/tf_text_graph_faster_rcnn.py
  67. 2
      samples/dnn/tf_text_graph_mask_rcnn.py
  68. 158
      samples/dnn/tf_text_graph_ssd.py

@ -577,7 +577,7 @@ else()
# Note: layout differs from OpenCV 3.4
include(GNUInstallDirs)
ocv_update(OPENCV_INCLUDE_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/opencv4")
ocv_update(OPENCV_LIB_INSTALL_PATH "${CMAKE_INSTALL_LIBDIR}${LIB_SUFFIX}")
ocv_update(OPENCV_LIB_INSTALL_PATH "${CMAKE_INSTALL_LIBDIR}")
ocv_update(OPENCV_CONFIG_INSTALL_PATH "${OPENCV_LIB_INSTALL_PATH}/cmake/opencv4")
ocv_update(OPENCV_3P_LIB_INSTALL_PATH "${OPENCV_LIB_INSTALL_PATH}/opencv4/3rdparty")
ocv_update(OPENCV_SAMPLES_SRC_INSTALL_PATH "${CMAKE_INSTALL_DATAROOTDIR}/opencv4/samples")

@ -144,6 +144,7 @@ if(DOXYGEN_FOUND)
string(REPLACE ";" " " CMAKE_DOXYGEN_ENABLED_SECTIONS "${CMAKE_DOXYGEN_ENABLED_SECTIONS}")
# TODO: remove paths_doc from EXAMPLE_PATH after face module tutorials/samples moved to separate folders
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH "${example_path} ; ${paths_doc} ; ${paths_sample}")
string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INCLUDE_ROOTS "${paths_include}")
set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_BINARY_DIR}/DoxygenLayout.xml")
set(CMAKE_DOXYGEN_OUTPUT_PATH "doxygen")
set(CMAKE_DOXYGEN_MAIN_REFERENCE "${refs_main}")

@ -22,8 +22,8 @@ ABBREVIATE_BRIEF = "The $name class" \
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = YES
STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@/modules
STRIP_FROM_INC_PATH =
STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@/modules @CMAKE_DOXYGEN_INCLUDE_ROOTS@
STRIP_FROM_INC_PATH = @CMAKE_DOXYGEN_INCLUDE_ROOTS@
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = NO
QT_AUTOBRIEF = NO
@ -72,8 +72,8 @@ INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
SHOW_GROUPED_MEMB_INC = NO
FORCE_LOCAL_INCLUDES = YES
SHOW_GROUPED_MEMB_INC = YES
FORCE_LOCAL_INCLUDES = NO
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = YES

@ -53,8 +53,8 @@ import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
img1 = cv.imread('box.png',0) # queryImage
img2 = cv.imread('box_in_scene.png',0) # trainImage
img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage
img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage
# Initiate ORB detector
orb = cv.ORB_create()
@ -79,7 +79,7 @@ matches = bf.match(des1,des2)
matches = sorted(matches, key = lambda x:x.distance)
# Draw first 10 matches.
img3 = cv.drawMatches(img1,kp1,img2,kp2,matches[:10], flags=2)
img3 = cv.drawMatches(img1,kp1,img2,kp2,matches[:10],None,flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
plt.imshow(img3),plt.show()
@endcode
@ -104,13 +104,13 @@ so that we can apply ratio test explained by D.Lowe in his paper.
@code{.py}
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt
img1 = cv.imread('box.png',0) # queryImage
img2 = cv.imread('box_in_scene.png',0) # trainImage
img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage
img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage
# Initiate SIFT detector
sift = cv.SIFT()
sift = cv.xfeatures2d.SIFT_create()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
@ -127,7 +127,7 @@ for m,n in matches:
good.append([m])
# cv.drawMatchesKnn expects list of lists as matches.
img3 = cv.drawMatchesKnn(img1,kp1,img2,kp2,good,flags=2)
img3 = cv.drawMatchesKnn(img1,kp1,img2,kp2,good,None,flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
plt.imshow(img3),plt.show()
@endcode
@ -168,13 +168,13 @@ With this information, we are good to go.
@code{.py}
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt
img1 = cv.imread('box.png',0) # queryImage
img2 = cv.imread('box_in_scene.png',0) # trainImage
img1 = cv.imread('box.png',cv.IMREAD_GRAYSCALE) # queryImage
img2 = cv.imread('box_in_scene.png',cv.IMREAD_GRAYSCALE) # trainImage
# Initiate SIFT detector
sift = cv.SIFT()
sift = cv.xfeatures2d.SIFT_create()
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img1,None)
@ -190,7 +190,7 @@ flann = cv.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
# Need to draw only good matches, so create a mask
matchesMask = [[0,0] for i in xrange(len(matches))]
matchesMask = [[0,0] for i in range(len(matches))]
# ratio test as per Lowe's paper
for i,(m,n) in enumerate(matches):
@ -200,7 +200,7 @@ for i,(m,n) in enumerate(matches):
draw_params = dict(matchColor = (0,255,0),
singlePointColor = (255,0,0),
matchesMask = matchesMask,
flags = 0)
flags = cv.DrawMatchesFlags_DEFAULT)
img3 = cv.drawMatchesKnn(img1,kp1,img2,kp2,matches,None,**draw_params)

@ -156,7 +156,7 @@ void CirclesGridClusterFinder::findGrid(const std::vector<cv::Point2f> &points,
#endif
std::vector<Point2f> hull2f;
convexHull(Mat(patternPoints), hull2f, false);
convexHull(patternPoints, hull2f, false);
const size_t cornersCount = isAsymmetricGrid ? 6 : 4;
if(hull2f.size() < cornersCount)
return;
@ -407,7 +407,7 @@ void CirclesGridClusterFinder::rectifyPatternPoints(const std::vector<cv::Point2
}
}
Mat homography = findHomography(Mat(sortedCorners), Mat(idealPoints), 0);
Mat homography = findHomography(sortedCorners, idealPoints, 0);
Mat rectifiedPointsMat;
transform(patternPoints, rectifiedPointsMat, homography);
rectifiedPatternPoints.clear();
@ -863,8 +863,8 @@ Mat CirclesGridFinder::rectifyGrid(Size detectedGridSize, const std::vector<Poin
}
}
Mat H = findHomography(Mat(centers), Mat(dstPoints), RANSAC);
//Mat H = findHomography( Mat( corners ), Mat( dstPoints ) );
Mat H = findHomography(centers, dstPoints, RANSAC);
//Mat H = findHomography(corners, dstPoints);
if (H.empty())
{
@ -880,7 +880,7 @@ Mat CirclesGridFinder::rectifyGrid(Size detectedGridSize, const std::vector<Poin
}
Mat dstKeypointsMat;
transform(Mat(srcKeypoints), dstKeypointsMat, H);
transform(srcKeypoints, dstKeypointsMat, H);
std::vector<Point2f> dstKeypoints;
convertPointsFromHomogeneous(dstKeypointsMat, dstKeypoints);
@ -1168,7 +1168,7 @@ void CirclesGridFinder::findBasis(const std::vector<Point2f> &samples, std::vect
}
for (size_t i = 0; i < basis.size(); i++)
{
convexHull(Mat(clusters[i]), hulls[i]);
convexHull(clusters[i], hulls[i]);
}
basisGraphs.resize(basis.size(), Graph(keypoints.size()));
@ -1183,7 +1183,7 @@ void CirclesGridFinder::findBasis(const std::vector<Point2f> &samples, std::vect
for (size_t k = 0; k < hulls.size(); k++)
{
if (pointPolygonTest(Mat(hulls[k]), vec, false) >= 0)
if (pointPolygonTest(hulls[k], vec, false) >= 0)
{
basisGraphs[k].addEdge(i, j);
}
@ -1414,7 +1414,6 @@ void CirclesGridFinder::drawHoles(const Mat &srcImage, Mat &drawImage) const
if (i != holes.size() - 1)
line(drawImage, keypoints[holes[i][j]], keypoints[holes[i + 1][j]], Scalar(255, 0, 0), 2);
//circle(drawImage, keypoints[holes[i][j]], holeRadius, holeColor, holeThickness);
circle(drawImage, keypoints[holes[i][j]], holeRadius, holeColor, holeThickness);
}
}

@ -185,6 +185,10 @@ bool HomographyDecompZhang::findMotionFrom_tstar_n(const cv::Vec3d& tstar, const
temp(1, 1) += 1.0;
temp(2, 2) += 1.0;
motion.R = getHnorm() * temp.inv();
if (cv::determinant(motion.R) < 0)
{
motion.R *= -1;
}
motion.t = motion.R * tstar;
motion.n = n;
return passesSameSideOfPlaneConstraint(motion);
@ -312,6 +316,10 @@ void HomographyDecompInria::findRmatFrom_tstar_n(const cv::Vec3d& tstar, const c
0.0, 0.0, 1.0);
R = getHnorm() * (I - (2/v) * tstar_m * n_m.t() );
if (cv::determinant(R) < 0)
{
R *= -1;
}
}
void HomographyDecompInria::decompose(std::vector<CameraMotion>& camMotions)

@ -194,9 +194,8 @@ bool cv::find4QuadCornerSubpix(InputArray _img, InputOutputArray _corners, Size
erode(white_comp, white_comp, Mat(), Point(-1, -1), erode_count);
std::vector<std::vector<Point> > white_contours, black_contours;
std::vector<Vec4i> white_hierarchy, black_hierarchy;
findContours(black_comp, black_contours, black_hierarchy, RETR_LIST, CHAIN_APPROX_SIMPLE);
findContours(white_comp, white_contours, white_hierarchy, RETR_LIST, CHAIN_APPROX_SIMPLE);
findContours(black_comp, black_contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
findContours(white_comp, white_contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
if(black_contours.size() < 5 || white_contours.size() < 5) continue;

@ -1408,7 +1408,7 @@ bool CV_StereoCalibrationTest::checkPandROI( int test_case_idx, const Mat& M, co
for( x = 0; x < N; x++ )
pts.push_back(Point2f((float)x*imgsize.width/(N-1), (float)y*imgsize.height/(N-1)));
undistortPoints(Mat(pts), upts, M, D, R, P );
undistortPoints(pts, upts, M, D, R, P );
for( k = 0; k < N*N; k++ )
if( upts[k].x < -imgsize.width*eps || upts[k].x > imgsize.width*(1+eps) ||
upts[k].y < -imgsize.height*eps || upts[k].y > imgsize.height*(1+eps) )
@ -1717,8 +1717,8 @@ void CV_StereoCalibrationTest::run( int )
for( int i = 0, k = 0; i < nframes; i++ )
{
vector<Point2f> temp[2];
undistortPoints(Mat(imgpt1[i]), temp[0], M1, D1, R1, P1);
undistortPoints(Mat(imgpt2[i]), temp[1], M2, D2, R2, P2);
undistortPoints(imgpt1[i], temp[0], M1, D1, R1, P1);
undistortPoints(imgpt2[i], temp[1], M2, D2, R2, P2);
for( int j = 0; j < npoints; j++, k++ )
{

@ -353,7 +353,7 @@ protected:
rvecs_spnp.resize(brdsNum);
tvecs_spnp.resize(brdsNum);
for(size_t i = 0; i < brdsNum; ++i)
solvePnP(Mat(objectPoints[i]), Mat(imagePoints[i]), camMat, distCoeffs, rvecs_spnp[i], tvecs_spnp[i]);
solvePnP(objectPoints[i], imagePoints[i], camMat, distCoeffs, rvecs_spnp[i], tvecs_spnp[i]);
compareShiftVecs(tvecs_exp, tvecs_spnp);
compareRotationVecs(rvecs_exp, rvecs_spnp);

@ -126,10 +126,10 @@ Mat ChessBoardGenerator::generateChessBoard(const Mat& bg, const Mat& camMat, co
generateEdge(p3, p4, pts_square3d);
generateEdge(p4, p1, pts_square3d);
projectPoints(Mat(pts_square3d), rvec, tvec, camMat, distCoeffs, pts_square2d);
projectPoints(pts_square3d, rvec, tvec, camMat, distCoeffs, pts_square2d);
squares_black.resize(squares_black.size() + 1);
vector<Point2f> temp;
approxPolyDP(Mat(pts_square2d), temp, 1.0, true);
approxPolyDP(pts_square2d, temp, 1.0, true);
transform(temp.begin(), temp.end(), back_inserter(squares_black.back()), Mult(rendererResolutionMultiplier));
}
@ -139,7 +139,7 @@ Mat ChessBoardGenerator::generateChessBoard(const Mat& bg, const Mat& camMat, co
for(int i = 0; i < patternSize.width - 1; ++i)
corners3d.push_back(zero + (i + 1) * sqWidth * pb1 + (j + 1) * sqHeight * pb2);
corners.clear();
projectPoints(Mat(corners3d), rvec, tvec, camMat, distCoeffs, corners);
projectPoints(corners3d, rvec, tvec, camMat, distCoeffs, corners);
vector<Point3f> whole3d;
vector<Point2f> whole2d;
@ -147,9 +147,9 @@ Mat ChessBoardGenerator::generateChessBoard(const Mat& bg, const Mat& camMat, co
generateEdge(whole[1], whole[2], whole3d);
generateEdge(whole[2], whole[3], whole3d);
generateEdge(whole[3], whole[0], whole3d);
projectPoints(Mat(whole3d), rvec, tvec, camMat, distCoeffs, whole2d);
projectPoints(whole3d, rvec, tvec, camMat, distCoeffs, whole2d);
vector<Point2f> temp_whole2d;
approxPolyDP(Mat(whole2d), temp_whole2d, 1.0, true);
approxPolyDP(whole2d, temp_whole2d, 1.0, true);
vector< vector<Point > > whole_contour(1);
transform(temp_whole2d.begin(), temp_whole2d.end(),
@ -213,7 +213,7 @@ Mat ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const Mat
pts3d[3] = p - pb1 * cbHalfWidthEx + cbHalfHeightEx * pb2;
/* can remake with better perf */
projectPoints(Mat(pts3d), rvec, tvec, camMat, distCoeffs, pts2d);
projectPoints(pts3d, rvec, tvec, camMat, distCoeffs, pts2d);
bool inrect1 = pts2d[0].x < bg.cols && pts2d[0].y < bg.rows && pts2d[0].x > 0 && pts2d[0].y > 0;
bool inrect2 = pts2d[1].x < bg.cols && pts2d[1].y < bg.rows && pts2d[1].x > 0 && pts2d[1].y > 0;
@ -278,7 +278,7 @@ Mat ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const Mat
pts3d[3] = p - pb1 * cbHalfWidthEx + cbHalfHeightEx * pb2;
/* can remake with better perf */
projectPoints(Mat(pts3d), rvec, tvec, camMat, distCoeffs, pts2d);
projectPoints(pts3d, rvec, tvec, camMat, distCoeffs, pts2d);
bool inrect1 = pts2d[0].x < bg.cols && pts2d[0].y < bg.rows && pts2d[0].x > 0 && pts2d[0].y > 0;
bool inrect2 = pts2d[1].x < bg.cols && pts2d[1].y < bg.rows && pts2d[1].x > 0 && pts2d[1].y > 0;
@ -320,7 +320,7 @@ Mat ChessBoardGenerator::operator ()(const Mat& bg, const Mat& camMat, const Mat
pts3d[3] = p - pb1 * cbHalfWidthEx + cbHalfHeightEx * pb2;
/* can remake with better perf */
projectPoints(Mat(pts3d), rvec, tvec, camMat, distCoeffs, pts2d);
projectPoints(pts3d, rvec, tvec, camMat, distCoeffs, pts2d);
Point3f zero = p - pb1 * cbHalfWidth - cbHalfHeight * pb2;

@ -134,4 +134,36 @@ private:
TEST(Calib3d_DecomposeHomography, regression) { CV_HomographyDecompTest test; test.safe_run(); }
TEST(Calib3d_DecomposeHomography, issue_4978)
{
Matx33d K(
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0
);
Matx33d H(
-0.102896, 0.270191, -0.0031153,
0.0406387, 1.19569, -0.0120456,
0.445351, 0.0410889, 1
);
vector<Mat> rotations;
vector<Mat> translations;
vector<Mat> normals;
decomposeHomographyMat(H, K, rotations, translations, normals);
ASSERT_GT(rotations.size(), (size_t)0u);
for (size_t i = 0; i < rotations.size(); i++)
{
// check: det(R) = 1
EXPECT_TRUE(std::fabs(cv::determinant(rotations[i]) - 1.0) < 0.01)
<< "R: det=" << cv::determinant(rotations[0]) << std::endl << rotations[i] << std::endl
<< "T:" << std::endl << translations[i] << std::endl;
}
}
}} // namespace

@ -124,7 +124,7 @@ protected:
vector<Point2f> projectedPoints;
projectedPoints.resize(points.size());
projectPoints(Mat(points), trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
projectPoints(points, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
for (size_t i = 0; i < projectedPoints.size(); i++)
{
if (i % 20 == 0)
@ -241,7 +241,7 @@ protected:
vector<Point2f> projectedPoints;
projectedPoints.resize(opoints.size());
projectPoints(Mat(opoints), trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
projectPoints(opoints, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
bool isEstimateSuccess = solvePnP(opoints, projectedPoints, intrinsics, distCoeffs, rvec, tvec, false, method);
if (isEstimateSuccess == false)
@ -291,7 +291,7 @@ class CV_solveP3P_Test : public CV_solvePnPRansac_Test
vector<Point2f> projectedPoints;
projectedPoints.resize(opoints.size());
projectPoints(Mat(opoints), trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
projectPoints(opoints, trueRvec, trueTvec, intrinsics, distCoeffs, projectedPoints);
int num_of_solutions = solveP3P(opoints, projectedPoints, intrinsics, distCoeffs, rvecs, tvecs, method);
if (num_of_solutions != (int) rvecs.size() || num_of_solutions != (int) tvecs.size() || num_of_solutions == 0)

@ -186,6 +186,16 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
# endif
#endif
#ifndef CV_ALWAYS_INLINE
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
#elif defined(_MSC_VER)
#define CV_ALWAYS_INLINE __forceinline
#else
#define CV_ALWAYS_INLINE inline
#endif
#endif
#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
# define CV_ENABLE_UNROLLED 0
#else

@ -6,7 +6,7 @@
#define OPENCV_DNN_VERSION_HPP
/// Use with major OpenCV version only.
#define OPENCV_DNN_API_VERSION 20181221
#define OPENCV_DNN_API_VERSION 20190122
#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_INLINE_NS
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)

@ -157,8 +157,7 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
{
if (backend == DNN_BACKEND_HALIDE ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)))
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", "",
Mat(cv::Size(224, 224), CV_32FC3));
@ -211,8 +210,7 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
PERF_TEST_P_(DNNTestNetwork, YOLOv3)
{
if (backend == DNN_BACKEND_HALIDE ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png", false));
Mat inp;
@ -222,8 +220,11 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
{
if (backend == DNN_BACKEND_HALIDE ||
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
if (backend == DNN_BACKEND_HALIDE
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE < 2018030000
|| (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
#endif
)
throw SkipTestException("");
processNet("dnn/frozen_east_text_detection.pb", "", "", Mat(cv::Size(320, 320), CV_32FC3));
}

@ -707,12 +707,6 @@ struct DataLayer : public Layer
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "ScaleShift";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
CV_CheckEQ(inputsData.size(), (size_t)1, "");
CV_CheckEQ(inputsData[0].dims, 4, "");
const size_t numChannels = inputsData[0].size[1];
@ -723,7 +717,6 @@ struct DataLayer : public Layer
{numChannels});
weights->allocate();
weights->set(std::vector<float>(numChannels, scaleFactors[0]));
ieLayer->_weights = weights;
// Mean subtraction
auto biases = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
@ -735,8 +728,21 @@ struct DataLayer : public Layer
biasesVec[i] = -means[0][i] * scaleFactors[0];
}
biases->set(biasesVec);
ieLayer->_biases = biases;
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ScaleShiftLayer ieLayer(name);
ieLayer.setWeights(weights);
ieLayer.setBiases(biases);
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "ScaleShift";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
ieLayer->_weights = weights;
ieLayer->_biases = biases;
#endif
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
@ -1480,7 +1486,11 @@ struct Net::Impl
if (layerNet != ieInpNode->net)
{
// layerNet is empty or nodes are from different graphs.
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
ieInpNode->net->addOutput(ieInpNode->layer.getName());
#else
ieInpNode->net->addOutput(ieInpNode->layer->name);
#endif
}
}
}
@ -1590,7 +1600,7 @@ struct Net::Impl
// Build Inference Engine networks from sets of layers that support this
// backend. Split a whole model on several Inference Engine networks if
// some of layers is not implemented.
// some of layers are not implemented.
// Set of all input and output blobs wrappers for current network.
std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
@ -1606,7 +1616,7 @@ struct Net::Impl
{
addInfEngineNetOutputs(ld);
net = Ptr<InfEngineBackendNet>();
netBlobsWrappers.clear();
netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
layer->preferableTarget = DNN_TARGET_CPU;
continue;
}
@ -1624,12 +1634,13 @@ struct Net::Impl
if (ieInpNode->net != net)
{
net = Ptr<InfEngineBackendNet>();
netBlobsWrappers.clear();
netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef.
break;
}
}
}
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
// The same blobs wrappers cannot be shared between two Inference Engine
// networks because of explicit references between layers and blobs.
// So we need to rewrap all the external blobs.
@ -1646,6 +1657,7 @@ struct Net::Impl
ld.inputBlobsWrappers[i] = it->second;
}
netBlobsWrappers[LayerPin(ld.id, 0)] = ld.outputBlobsWrappers[0];
#endif // IE < R5
Ptr<BackendNode> node;
if (!net.empty())
@ -1676,6 +1688,40 @@ struct Net::Impl
CV_Assert(!ieNode.empty());
ieNode->net = net;
// Convert weights in FP16 for specific targets.
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
preferableTarget == DNN_TARGET_MYRIAD ||
preferableTarget == DNN_TARGET_FPGA) && !fused)
{
auto& blobs = ieNode->layer.getConstantData();
if (blobs.empty())
{
// In case of non weightable layer we have to specify
// it's precision adding dummy blob.
auto blob = InferenceEngine::make_shared_blob<int16_t>(
InferenceEngine::Precision::FP16,
InferenceEngine::Layout::C, {1});
blob->allocate();
blobs[""] = blob;
}
else
{
for (auto& it : blobs)
it.second = convertFp16(std::const_pointer_cast<InferenceEngine::Blob>(it.second));
}
}
if (!fused)
net->addLayer(ieNode->layer);
net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName());
net->addBlobs(ld.inputBlobsWrappers);
net->addBlobs(ld.outputBlobsWrappers);
addInfEngineNetOutputs(ld);
#else // IE >= R5
auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
if ((preferableTarget == DNN_TARGET_OPENCL_FP16 ||
preferableTarget == DNN_TARGET_MYRIAD ||
@ -1713,10 +1759,10 @@ struct Net::Impl
if (!fused)
net->addLayer(ieNode->layer);
addInfEngineNetOutputs(ld);
#endif // IE >= R5
}
// Initialize all networks.
std::set<InfEngineBackendNet> initializedNets;
for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
{
LayerData &ld = it->second;
@ -2622,7 +2668,11 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
Net cvNet;
cvNet.setInputsNames(inputsNames);
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(InferenceEngine::Builder::Layer("")));
#else
Ptr<InfEngineBackendNode> backendNode(new InfEngineBackendNode(0));
#endif
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
for (auto& it : ieNet.getOutputsInfo())
{

@ -349,6 +349,14 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ScaleShiftLayer ieLayer(name);
const size_t numChannels = weights_.total();
ieLayer.setWeights(wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C));
ieLayer.setBiases(wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "ScaleShift";
@ -360,6 +368,7 @@ public:
ieLayer->_biases = wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -110,6 +110,11 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::SplitLayer ieLayer(name);
ieLayer.setOutputPorts({InferenceEngine::Port()});
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
CV_Assert(!input->dims.empty());
@ -123,6 +128,7 @@ public:
ieLayer->params["out_sizes"] = format("%d", (int)input->dims[0]);
#endif
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -313,6 +313,14 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::Builder::ConcatLayer ieLayer(name);
ieLayer.setAxis(clamp(axis, input->dims.size()));
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::LayerParams lp;
lp.name = name;
@ -321,6 +329,7 @@ public:
std::shared_ptr<InferenceEngine::ConcatLayer> ieLayer(new InferenceEngine::ConcatLayer(lp));
ieLayer->_axis = clamp(axis, input->dims.size());
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -521,6 +521,54 @@ public:
const int inpGroupCn = blobs[0].size[1];
const int group = inpCn / inpGroupCn;
auto ieWeights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
if (newWeightAndBias)
{
if (weightsMat.isContinuous())
{
Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
ieWeights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW);
}
else
{
ieWeights = InferenceEngine::make_shared_blob<float>(
InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
ieWeights->dims());
ieWeights->allocate();
Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, outCn);
Mat fusedWeights = weightsMat.colRange(0, newWeights.cols);
fusedWeights.copyTo(newWeights);
}
}
InferenceEngine::Blob::Ptr ieBiases;
if (hasBias() || fusedBias)
{
Mat biasesMat({outCn}, CV_32F, &biasvec[0]);
ieBiases = wrapToInfEngineBlob(biasesMat, {(size_t)outCn}, InferenceEngine::Layout::C);
}
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ConvolutionLayer ieLayer(name);
ieLayer.setKernel({kernel.height, kernel.width});
ieLayer.setStrides({stride.height, stride.width});
ieLayer.setDilation({dilation.height, dilation.width});
ieLayer.setPaddingsBegin({pad.height, pad.width});
ieLayer.setPaddingsEnd({pad.height, pad.width});
ieLayer.setGroup(group);
ieLayer.setOutDepth(outCn);
ieLayer.setWeights(ieWeights);
if (ieBiases)
ieLayer.setBiases(ieBiases);
InferenceEngine::Builder::Layer l = ieLayer;
if (!padMode.empty())
l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper");
return Ptr<BackendNode>(new InfEngineBackendNode(l));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Convolution";
@ -557,32 +605,11 @@ public:
ieLayer->_out_depth = outCn;
ieLayer->_group = group;
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
if (newWeightAndBias)
{
if (weightsMat.isContinuous())
{
Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
ieLayer->_weights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW);
}
else
{
ieLayer->_weights = InferenceEngine::make_shared_blob<float>(
InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
ieLayer->_weights->dims());
ieLayer->_weights->allocate();
Mat newWeights = infEngineBlobToMat(ieLayer->_weights).reshape(1, outCn);
Mat fusedWeights = weightsMat.colRange(0, newWeights.cols);
fusedWeights.copyTo(newWeights);
}
}
if (hasBias() || fusedBias)
{
Mat biasesMat({outCn}, CV_32F, &biasvec[0]);
ieLayer->_biases = wrapToInfEngineBlob(biasesMat, {(size_t)outCn}, InferenceEngine::Layout::C);
}
ieLayer->_weights = ieWeights;
if (ieBiases)
ieLayer->_biases = ieBiases;
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
@ -1193,6 +1220,9 @@ public:
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{
if (INF_ENGINE_RELEASE == 2018050000 && (adjustPad.height || adjustPad.width))
return false;
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
const int group = numOutput / outGroupCn;
if (group != 1)
@ -1747,6 +1777,27 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> > &) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
const int group = numOutput / outGroupCn;
InferenceEngine::Builder::DeconvolutionLayer ieLayer(name);
ieLayer.setKernel({kernel.height, kernel.width});
ieLayer.setStrides({stride.height, stride.width});
ieLayer.setDilation({dilation.height, dilation.width});
ieLayer.setPaddingsBegin({pad.height, pad.width});
ieLayer.setPaddingsEnd({pad.height, pad.width});
ieLayer.setGroup(group);
ieLayer.setOutDepth(numOutput);
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW));
if (hasBias())
{
ieLayer.setBiases(wrapToInfEngineBlob(blobs[1], {(size_t)numOutput}, InferenceEngine::Layout::C));
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW layout
const int group = numOutput / outGroupCn;
@ -1786,6 +1837,7 @@ public:
ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {(size_t)numOutput}, InferenceEngine::Layout::C);
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -67,8 +67,12 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE && crop_ranges.size() == 4);
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
return INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5) && crop_ranges.size() == 4;
else
#endif
return backendId == DNN_BACKEND_OPENCV;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -145,9 +149,10 @@ public:
input(&crop_ranges[0]).copyTo(outputs[0]);
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Crop";
@ -181,9 +186,11 @@ public:
ieLayer->dim.push_back(crop_ranges[3].end - crop_ranges[3].start);
#endif
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
#else
return Ptr<BackendNode>();
#endif // IE < R5
}
#endif
std::vector<Range> crop_ranges;
};

@ -939,6 +939,25 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::DetectionOutputLayer ieLayer(name);
ieLayer.setNumClasses(_numClasses);
ieLayer.setShareLocation(_shareLocation);
ieLayer.setBackgroudLabelId(_backgroundLabelId);
ieLayer.setNMSThreshold(_nmsThreshold);
ieLayer.setTopK(_topK);
ieLayer.setKeepTopK(_keepTopK);
ieLayer.setConfidenceThreshold(_confidenceThreshold);
ieLayer.setVariantEncodedInTarget(_varianceEncodedInTarget);
ieLayer.setCodeType("caffe.PriorBoxParameter." + _codeType);
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(3));
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["eta"] = std::string("1.0");
return Ptr<BackendNode>(new InfEngineBackendNode(l));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "DetectionOutput";
@ -956,6 +975,7 @@ public:
ieLayer->params["variance_encoded_in_target"] = _varianceEncodedInTarget ? "1" : "0";
ieLayer->params["code_type"] = "caffe.PriorBoxParameter." + _codeType;
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -153,10 +153,16 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer ieLayer = func.initInfEngineBuilderAPI();
ieLayer.setName(this->name);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = this->name;
lp.precision = InferenceEngine::Precision::FP32;
return Ptr<BackendNode>(new InfEngineBackendNode(func.initInfEngine(lp)));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
@ -355,6 +361,12 @@ struct ReLUFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(slope);
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "ReLU";
@ -363,6 +375,7 @@ struct ReLUFunctor
ieLayer->params["negative_slope"] = format("%f", slope);
return ieLayer;
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -472,6 +485,12 @@ struct ReLU6Functor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
return InferenceEngine::Builder::ClampLayer("").setMinValue(minValue).setMaxValue(maxValue);
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "Clamp";
@ -482,6 +501,7 @@ struct ReLU6Functor
ieLayer->params["max"] = format("%f", maxValue);
return ieLayer;
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -558,12 +578,19 @@ struct TanHFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
return InferenceEngine::Builder::TanHLayer("");
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "TanH";
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
return ieLayer;
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -640,12 +667,19 @@ struct SigmoidFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
return InferenceEngine::Builder::SigmoidLayer("");
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "Sigmoid";
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
return ieLayer;
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -724,11 +758,18 @@ struct ELUFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
return InferenceEngine::Builder::ELULayer("");
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "ELU";
return InferenceEngine::CNNLayerPtr(new InferenceEngine::CNNLayer(lp));
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -805,6 +846,12 @@ struct AbsValFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-1);
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "ReLU";
@ -813,6 +860,7 @@ struct AbsValFunctor
ieLayer->params["negative_slope"] = "-1.0";
return ieLayer;
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -868,11 +916,18 @@ struct BNLLFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
CV_Error(Error::StsNotImplemented, "");
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
CV_Error(Error::StsNotImplemented, "BNLL");
return InferenceEngine::CNNLayerPtr();
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -985,6 +1040,14 @@ struct PowerFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
return InferenceEngine::Builder::PowerLayer("").setPower(power)
.setScale(scale)
.setShift(shift);
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
if (power == 1.0f && scale == 1.0f && shift == 0.0f)
@ -1004,6 +1067,7 @@ struct PowerFunctor
return ieLayer;
}
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN
@ -1143,6 +1207,15 @@ struct ChannelsPReLUFunctor
#endif // HAVE_HALIDE
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer initInfEngineBuilderAPI()
{
InferenceEngine::Builder::PReLULayer ieLayer("");
const size_t numChannels = scale.total();
ieLayer.setWeights(wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C));
return ieLayer;
}
#else
InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
{
lp.type = "PReLU";
@ -1151,6 +1224,7 @@ struct ChannelsPReLUFunctor
ieLayer->_weights = wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C);
return ieLayer;
}
#endif
#endif // HAVE_INF_ENGINE
#ifdef HAVE_VULKAN

@ -99,7 +99,7 @@ public:
return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_HALIDE ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE &&
(preferableTarget != DNN_TARGET_MYRIAD || coeffs.empty()));
(preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()));
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -420,9 +420,29 @@ public:
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::EltwiseLayer ieLayer(name);
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(inputs.size()));
if (op == SUM)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM);
else if (op == PROD)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL);
else if (op == MAX)
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
else
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
InferenceEngine::Builder::Layer l = ieLayer;
if (!coeffs.empty())
l.getParameters()["coeff"] = coeffs;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Eltwise";
@ -438,6 +458,7 @@ public:
else
CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -152,9 +152,19 @@ public:
}
}
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Flatten");
ieLayer.getParameters()["axis"] = _startAxis;
ieLayer.getParameters()["end_axis"] = _endAxis;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Flatten";
@ -163,6 +173,7 @@ public:
ieLayer->params["axis"] = format("%d", _startAxis);
ieLayer->params["end_axis"] = format("%d", _endAxis);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -442,6 +442,18 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::FullyConnectedLayer ieLayer(name);
const int outNum = blobs[0].size[0];
ieLayer.setOutputNum(outNum);
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW));
if (blobs.size() > 1)
ieLayer.setBiases(wrapToInfEngineBlob(blobs[1], {(size_t)outNum}, InferenceEngine::Layout::C));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "FullyConnected";
@ -456,6 +468,7 @@ public:
if (blobs.size() > 1)
ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {(size_t)ieLayer->_out_num}, InferenceEngine::Layout::C);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -393,6 +393,17 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::NormLayer ieLayer(name);
ieLayer.setSize(size);
ieLayer.setAlpha(alpha);
ieLayer.setBeta(beta);
ieLayer.setAcrossMaps(type == CHANNEL_NRM);
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["k"] = bias;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Norm";
@ -405,6 +416,7 @@ public:
ieLayer->_alpha = alpha;
ieLayer->_isAcrossMaps = (type == CHANNEL_NRM);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -371,6 +371,13 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::MVNLayer ieLayer(name);
ieLayer.setAcrossChannels(acrossChannels);
ieLayer.setNormalize(normVariance);
ieLayer.setEpsilon(eps);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "MVN";
@ -380,6 +387,7 @@ public:
ieLayer->params["normalize_variance"] = normVariance ? "1" : "0";
ieLayer->params["eps"] = format("%f", eps);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -264,6 +264,49 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
if (input->dims.size() == 4)
{
InferenceEngine::Builder::NormalizeLayer ieLayer(name);
ieLayer.setChannelShared(false);
ieLayer.setAcrossMaps(acrossSpatial);
ieLayer.setEpsilon(epsilon);
InferenceEngine::Builder::Layer l = ieLayer;
const int numChannels = input->dims[2]; // NOTE: input->dims are reversed (whcn)
if (blobs.empty())
{
auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
InferenceEngine::Layout::C,
{(size_t)numChannels});
weights->allocate();
std::vector<float> ones(numChannels, 1);
weights->set(ones);
l.addConstantData("weights", weights);
l.getParameters()["channel_shared"] = false;
}
else
{
CV_Assert(numChannels == blobs[0].total());
l.addConstantData("weights", wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C));
l.getParameters()["channel_shared"] = blobs[0].total() == 1;
}
l.getParameters()["across_spatial"] = acrossSpatial;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
else
{
InferenceEngine::Builder::GRNLayer ieLayer(name);
ieLayer.setBeta(epsilon);
InferenceEngine::Builder::Layer l = ieLayer;
l.getParameters()["bias"] = epsilon;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#else
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::LayerParams lp;
@ -307,6 +350,7 @@ public:
ieLayer->params["bias"] = format("%f", epsilon);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -385,6 +385,11 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::PermuteLayer ieLayer(name);
ieLayer.setOrder(_order);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Permute";
@ -397,6 +402,7 @@ public:
ieLayer->params["order"] += format(",%zu", _order[i]);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -295,6 +295,48 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
if (type == MAX || type == AVE)
{
InferenceEngine::Builder::PoolingLayer ieLayer(name);
ieLayer.setKernel({kernel.height, kernel.width});
ieLayer.setStrides({stride.height, stride.width});
ieLayer.setPaddingsBegin({pad_t, pad_l});
ieLayer.setPaddingsEnd({pad_b, pad_r});
ieLayer.setPoolingType(type == MAX ?
InferenceEngine::Builder::PoolingLayer::PoolingType::MAX :
InferenceEngine::Builder::PoolingLayer::PoolingType::AVG);
ieLayer.setRoundingType(ceilMode ?
InferenceEngine::Builder::PoolingLayer::RoundingType::CEIL :
InferenceEngine::Builder::PoolingLayer::RoundingType::FLOOR);
ieLayer.setExcludePad(type == AVE && padMode == "SAME");
InferenceEngine::Builder::Layer l = ieLayer;
if (!padMode.empty())
l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper");
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
else if (type == ROI)
{
InferenceEngine::Builder::ROIPoolingLayer ieLayer(name);
ieLayer.setSpatialScale(spatialScale);
ieLayer.setPooled({pooledSize.height, pooledSize.width});
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
else if (type == PSROI)
{
InferenceEngine::Builder::PSROIPoolingLayer ieLayer(name);
ieLayer.setSpatialScale(spatialScale);
ieLayer.setOutputDim(psRoiOutChannels);
ieLayer.setGroupSize(pooledSize.width);
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(2));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
}
else
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
return Ptr<BackendNode>();
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.precision = InferenceEngine::Precision::FP32;
@ -353,6 +395,7 @@ public:
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -498,6 +498,58 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
if (_explicitSizes)
{
InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name);
CV_Assert(_stepX == _stepY);
ieLayer.setStep(_stepX);
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
ieLayer.setOffset(_offsetsX[0]);
ieLayer.setClip(_clip);
ieLayer.setFlip(false); // We already flipped aspect ratios.
InferenceEngine::Builder::Layer l = ieLayer;
CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty());
CV_Assert(_boxWidths.size() == _boxHeights.size());
l.getParameters()["width"] = _boxWidths;
l.getParameters()["height"] = _boxHeights;
l.getParameters()["variance"] = _variance;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
else
{
InferenceEngine::Builder::PriorBoxLayer ieLayer(name);
CV_Assert(!_explicitSizes);
ieLayer.setMinSize(_minSize);
if (_maxSize > 0)
ieLayer.setMaxSize(_maxSize);
CV_Assert(_stepX == _stepY);
ieLayer.setStep(_stepX);
CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], "");
ieLayer.setOffset(_offsetsX[0]);
ieLayer.setClip(_clip);
ieLayer.setFlip(false); // We already flipped aspect ratios.
InferenceEngine::Builder::Layer l = ieLayer;
if (!_aspectRatios.empty())
{
l.getParameters()["aspect_ratio"] = _aspectRatios;
}
CV_Assert(!_variance.empty());
l.getParameters()["variance"] = _variance;
return Ptr<BackendNode>(new InfEngineBackendNode(l));
}
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = _explicitSizes ? "PriorBoxClustered" : "PriorBox";
@ -553,6 +605,7 @@ public:
ieLayer->params["offset"] = format("%f", _offsetsX[0]);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -328,6 +328,28 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ProposalLayer ieLayer(name);
ieLayer.setBaseSize(baseSize);
ieLayer.setFeatStride(featStride);
ieLayer.setMinSize(16);
ieLayer.setNMSThresh(nmsThreshold);
ieLayer.setPostNMSTopN(keepTopAfterNMS);
ieLayer.setPreNMSTopN(keepTopBeforeNMS);
std::vector<float> scalesVec(scales.size());
for (int i = 0; i < scales.size(); ++i)
scalesVec[i] = scales.get<float>(i);
ieLayer.setScale(scalesVec);
std::vector<float> ratiosVec(ratios.size());
for (int i = 0; i < ratios.size(); ++i)
ratiosVec[i] = ratios.get<float>(i);
ieLayer.setRatio(ratiosVec);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Proposal";
@ -353,6 +375,7 @@ public:
ieLayer->params["scale"] += format(",%f", scales.get<float>(i));
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -181,6 +181,11 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ReorgYoloLayer ieLayer(name);
ieLayer.setStride(reorgStride);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "ReorgYolo";
@ -188,6 +193,7 @@ public:
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
ieLayer->params["stride"] = format("%d", reorgStride);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -203,6 +203,17 @@ public:
return true;
}
void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
{
std::vector<Mat> outputs;
outputs_arr.getMatVector(outputs);
CV_Assert(!outputs.empty());
outShapes.resize(outputs.size());
for (int i = 0; i < outputs.size(); ++i)
outShapes[i] = shape(outputs[i]);
}
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
{
std::vector<UMat> inputs;
@ -218,8 +229,7 @@ public:
void *dst_handle = outputs[i].handle(ACCESS_WRITE);
if (src_handle != dst_handle)
{
MatShape outShape = shape(outputs[i]);
UMat umat = srcBlob.reshape(1, (int)outShape.size(), &outShape[0]);
UMat umat = srcBlob.reshape(1, (int)outShapes[i].size(), &outShapes[i][0]);
umat.copyTo(outputs[i]);
}
}
@ -250,6 +260,12 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ReshapeLayer ieLayer(name);
CV_Assert(outShapes.size() == 1);
ieLayer.setDims(outShapes[0]);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Reshape";
@ -265,9 +281,13 @@ public:
ieLayer->shape = std::vector<int>(shapeSrc->dims.rbegin(), shapeSrc->dims.rend());
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
private:
std::vector<MatShape> outShapes;
};
Ptr<ReshapeLayer> ReshapeLayer::create(const LayerParams& params)

@ -163,6 +163,33 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
if (interpolation == "nearest")
{
ieLayer.setType("Resample");
ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST");
ieLayer.getParameters()["antialias"] = false;
if (scaleWidth != scaleHeight)
CV_Error(Error::StsNotImplemented, "resample with sw != sh");
ieLayer.getParameters()["factor"] = 1.0 / scaleWidth;
}
else if (interpolation == "bilinear")
{
ieLayer.setType("Interp");
ieLayer.getParameters()["pad_beg"] = 0;
ieLayer.getParameters()["pad_end"] = 0;
ieLayer.getParameters()["align_corners"] = false;
}
else
CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation);
ieLayer.getParameters()["width"] = outWidth;
ieLayer.getParameters()["height"] = outHeight;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.precision = InferenceEngine::Precision::FP32;
@ -187,6 +214,7 @@ public:
ieLayer->params["width"] = cv::format("%d", outWidth);
ieLayer->params["height"] = cv::format("%d", outHeight);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
@ -247,6 +275,18 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer ieLayer(name);
ieLayer.setName(name);
ieLayer.setType("Interp");
ieLayer.getParameters()["pad_beg"] = 0;
ieLayer.getParameters()["pad_end"] = 0;
ieLayer.getParameters()["width"] = outWidth;
ieLayer.getParameters()["height"] = outHeight;
ieLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
ieLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Interp";
@ -256,6 +296,7 @@ public:
ieLayer->params["pad_beg"] = "0";
ieLayer->params["pad_end"] = "0";
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -197,6 +197,29 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::ScaleShiftLayer ieLayer(name);
CV_Assert(!blobs.empty());
const size_t numChannels = blobs[0].total();
if (hasWeights)
{
ieLayer.setWeights(wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C));
}
else
{
auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
{numChannels});
weights->allocate();
std::vector<float> ones(numChannels, 1);
weights->set(ones);
ieLayer.setWeights(weights);
}
if (hasBias)
ieLayer.setBiases(wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "ScaleShift";
@ -223,6 +246,7 @@ public:
ieLayer->_biases = wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -110,8 +110,15 @@ public:
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_OPENCV ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE && sliceRanges.size() == 1 && sliceRanges[0].size() == 4);
#ifdef HAVE_INF_ENGINE
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
{
return INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5) &&
sliceRanges.size() == 1 && sliceRanges[0].size() == 4;
}
else
#endif
return backendId == DNN_BACKEND_OPENCV;
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -254,9 +261,10 @@ public:
}
}
#ifdef HAVE_INF_ENGINE
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::LayerParams lp;
lp.name = name;
@ -286,10 +294,11 @@ public:
ieLayer->dim.push_back(sliceRanges[0][i].end - sliceRanges[0][i].start);
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
#else
return Ptr<BackendNode>();
#endif // IE < R5
}
#endif
};
Ptr<SliceLayer> SliceLayer::create(const LayerParams& params)

@ -326,6 +326,13 @@ public:
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::Builder::SoftMaxLayer ieLayer(name);
ieLayer.setAxis(clamp(axisRaw, input->dims.size()));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#else
InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
InferenceEngine::LayerParams lp;
@ -335,6 +342,7 @@ public:
std::shared_ptr<InferenceEngine::SoftMaxLayer> ieLayer(new InferenceEngine::SoftMaxLayer(lp));
ieLayer->axis = clamp(axisRaw, input->dims.size());
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}

@ -18,6 +18,10 @@ namespace cv { namespace dnn {
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::Builder::Layer& _layer)
: BackendNode(DNN_BACKEND_INFERENCE_ENGINE), layer(_layer) {}
#else
InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::CNNLayerPtr& _layer)
: BackendNode(DNN_BACKEND_INFERENCE_ENGINE), layer(_layer) {}
@ -40,6 +44,7 @@ void InfEngineBackendNode::connect(std::vector<Ptr<BackendWrapper> >& inputs,
layer->outData[0] = dataPtr;
dataPtr->creatorLayer = InferenceEngine::CNNLayerWeakPtr(layer);
}
#endif
static std::vector<Ptr<InfEngineBackendWrapper> >
infEngineWrappers(const std::vector<Ptr<BackendWrapper> >& ptrs)
@ -54,6 +59,129 @@ infEngineWrappers(const std::vector<Ptr<BackendWrapper> >& ptrs)
return wrappers;
}
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InfEngineBackendNet::InfEngineBackendNet() : netBuilder("")
{
hasNetOwner = false;
targetDevice = InferenceEngine::TargetDevice::eCPU;
}
InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net) : netBuilder(""), cnn(net)
{
hasNetOwner = true;
targetDevice = InferenceEngine::TargetDevice::eCPU;
}
void InfEngineBackendNet::connect(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendWrapper> >& outputs,
const std::string& layerName)
{
std::vector<Ptr<InfEngineBackendWrapper> > inpWrappers = infEngineWrappers(inputs);
std::map<std::string, int>::iterator it = layers.find(layerName);
CV_Assert(it != layers.end());
const int layerId = it->second;
for (int i = 0; i < inpWrappers.size(); ++i)
{
const auto& inp = inpWrappers[i];
const std::string& inpName = inp->dataPtr->name;
int inpId;
it = layers.find(inpName);
if (it == layers.end())
{
InferenceEngine::Builder::InputLayer inpLayer(inpName);
std::vector<size_t> shape(inp->blob->dims());
std::reverse(shape.begin(), shape.end());
inpLayer.setPort(InferenceEngine::Port(shape));
inpId = netBuilder.addLayer(inpLayer);
layers.insert({inpName, inpId});
}
else
inpId = it->second;
netBuilder.connect(inpId, {layerId, i});
unconnectedLayersIds.erase(inpId);
}
CV_Assert(!outputs.empty());
InferenceEngine::DataPtr dataPtr = infEngineDataNode(outputs[0]);
dataPtr->name = layerName;
}
void InfEngineBackendNet::init(int targetId)
{
if (!hasNetOwner)
{
CV_Assert(!unconnectedLayersIds.empty());
for (int id : unconnectedLayersIds)
{
InferenceEngine::Builder::OutputLayer outLayer("myconv1");
netBuilder.addLayer({id}, outLayer);
}
cnn = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(netBuilder.build()));
}
switch (targetId)
{
case DNN_TARGET_CPU:
targetDevice = InferenceEngine::TargetDevice::eCPU;
break;
case DNN_TARGET_OPENCL: case DNN_TARGET_OPENCL_FP16:
targetDevice = InferenceEngine::TargetDevice::eGPU;
break;
case DNN_TARGET_MYRIAD:
targetDevice = InferenceEngine::TargetDevice::eMYRIAD;
break;
case DNN_TARGET_FPGA:
targetDevice = InferenceEngine::TargetDevice::eFPGA;
break;
default:
CV_Error(Error::StsError, format("Unknown target identifier: %d", targetId));
}
for (const auto& name : requestedOutputs)
{
cnn.addOutput(name);
}
for (const auto& it : cnn.getInputsInfo())
{
const std::string& name = it.first;
auto blobIt = allBlobs.find(name);
CV_Assert(blobIt != allBlobs.end());
inpBlobs[name] = blobIt->second;
it.second->setPrecision(blobIt->second->precision());
}
for (const auto& it : cnn.getOutputsInfo())
{
const std::string& name = it.first;
auto blobIt = allBlobs.find(name);
CV_Assert(blobIt != allBlobs.end());
outBlobs[name] = blobIt->second;
it.second->setPrecision(blobIt->second->precision()); // Should be always FP32
}
initPlugin(cnn);
}
void InfEngineBackendNet::addLayer(const InferenceEngine::Builder::Layer& layer)
{
int id = netBuilder.addLayer(layer);
const std::string& layerName = layer.getName();
CV_Assert(layers.insert({layerName, id}).second);
unconnectedLayersIds.insert(id);
}
void InfEngineBackendNet::addOutput(const std::string& name)
{
requestedOutputs.push_back(name);
}
#endif // IE >= R5
static InferenceEngine::Layout estimateLayout(const Mat& m)
{
if (m.dims == 4)
@ -148,6 +276,7 @@ void InfEngineBackendWrapper::setHostDirty()
}
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
InfEngineBackendNet::InfEngineBackendNet()
{
targetDevice = InferenceEngine::TargetDevice::eCPU;
@ -491,6 +620,8 @@ void InfEngineBackendNet::init(int targetId)
initPlugin(*this);
}
#endif // IE < R5
static std::map<InferenceEngine::TargetDevice, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
@ -566,7 +697,11 @@ void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs
auto wrappers = infEngineWrappers(ptrs);
for (const auto& wrapper : wrappers)
{
allBlobs.insert({wrapper->dataPtr->name, wrapper->blob});
std::string name = wrapper->dataPtr->name;
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
name = name.empty() ? "id1" : name; // TODO: drop the magic input name.
#endif
allBlobs.insert({name, wrapper->blob});
}
}

@ -35,6 +35,11 @@
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_GE(ver) (((INF_ENGINE_RELEASE) / 10000) >= ((ver) / 10000))
#define INF_ENGINE_VER_MAJOR_LT(ver) (((INF_ENGINE_RELEASE) / 10000) < ((ver) / 10000))
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
#include <ie_builders.hpp>
#endif
#endif // HAVE_INF_ENGINE
@ -42,6 +47,7 @@ namespace cv { namespace dnn {
#ifdef HAVE_INF_ENGINE
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2018R5)
class InfEngineBackendNet : public InferenceEngine::ICNNNetwork
{
public:
@ -146,17 +152,75 @@ private:
void initPlugin(InferenceEngine::ICNNNetwork& net);
};
#else // IE < R5
class InfEngineBackendNet
{
public:
InfEngineBackendNet();
InfEngineBackendNet(InferenceEngine::CNNNetwork& net);
void addLayer(const InferenceEngine::Builder::Layer& layer);
void addOutput(const std::string& name);
void connect(const std::vector<Ptr<BackendWrapper> >& inputs,
const std::vector<Ptr<BackendWrapper> >& outputs,
const std::string& layerName);
bool isInitialized();
void init(int targetId);
void forward();
void initPlugin(InferenceEngine::ICNNNetwork& net);
void addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs);
private:
InferenceEngine::Builder::Network netBuilder;
InferenceEngine::InferenceEnginePluginPtr enginePtr;
InferenceEngine::InferencePlugin plugin;
InferenceEngine::ExecutableNetwork netExec;
InferenceEngine::InferRequest infRequest;
InferenceEngine::BlobMap allBlobs;
InferenceEngine::BlobMap inpBlobs;
InferenceEngine::BlobMap outBlobs;
InferenceEngine::TargetDevice targetDevice;
InferenceEngine::CNNNetwork cnn;
bool hasNetOwner;
std::map<std::string, int> layers;
std::vector<std::string> requestedOutputs;
std::set<int> unconnectedLayersIds;
};
#endif // IE < R5
class InfEngineBackendNode : public BackendNode
{
public:
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InfEngineBackendNode(const InferenceEngine::Builder::Layer& layer);
#else
InfEngineBackendNode(const InferenceEngine::CNNLayerPtr& layer);
#endif
void connect(std::vector<Ptr<BackendWrapper> >& inputs,
std::vector<Ptr<BackendWrapper> >& outputs);
InferenceEngine::CNNLayerPtr layer;
// Inference Engine network object that allows to obtain the outputs of this layer.
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2018R5)
InferenceEngine::Builder::Layer layer;
Ptr<InfEngineBackendNet> net;
#else
InferenceEngine::CNNLayerPtr layer;
Ptr<InfEngineBackendNet> net;
#endif
};
class InfEngineBackendWrapper : public BackendWrapper

@ -1,10 +1,2 @@
#include "../precomp.hpp"
#if defined(TH_DISABLE_HEAP_TRACKING)
#elif (defined(__unix) || defined(_WIN32))
#include <malloc.h>
#elif defined(__APPLE__)
#include <malloc/malloc.h>
#endif
#include "THGeneral.h"

@ -180,7 +180,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false);
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.013 : 0.0;
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.013 : 2e-5;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0;
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
inp, "detection_out", "", l1, lInf, 0.25);
@ -288,7 +288,7 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false);
// Output image has values in range [-143.526, 148.539].
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.3 : 4e-5;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7.0 : 2e-3;
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7.28 : 2e-3;
processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf);
}

@ -306,7 +306,7 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc)
// batch size 1
testDarknetModel(config_file, weights_file, ref.rowRange(0, 2), scoreDiff, iouDiff);
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018040000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_MYRIAD)
#endif
// batch size 2

@ -163,7 +163,7 @@ TEST_P(Deconvolution, Accuracy)
bool hasBias = get<6>(GetParam());
Backend backendId = get<0>(get<7>(GetParam()));
Target targetId = get<1>(get<7>(GetParam()));
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && (targetId == DNN_TARGET_CPU || targetId == DNN_TARGET_MYRIAD) &&
dilation.width == 2 && dilation.height == 2)
throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2018040000
@ -466,6 +466,7 @@ void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId)
pool.set("stride_w", 2);
pool.set("stride_h", 2);
pool.type = "Pooling";
pool.name = "ave_pool";
Net net;
int poolId = net.addLayer(pool.name, pool.type, pool);

@ -295,10 +295,6 @@ TEST_P(Test_Caffe_layers, Eltwise)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
#endif
testLayerUsingCaffeModels("layer_eltwise");
}

@ -351,6 +351,10 @@ TEST_P(Test_ONNX_nets, LResNet100E_IR)
l1 = 0.009;
lInf = 0.035;
}
else if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_CPU) {
l1 = 4.5e-5;
lInf = 1.9e-4;
}
testONNXModels("LResNet100E_IR", pb, l1, lInf);
}
@ -366,6 +370,10 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus)
l1 = 0.021;
lInf = 0.034;
}
else if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_CPU || target == DNN_TARGET_OPENCL)) {
l1 = 2.4e-4;
lInf = 6e-4;
}
testONNXModels("emotion_ferplus", pb, l1, lInf);
}
@ -389,7 +397,7 @@ TEST_P(Test_ONNX_nets, Inception_v1)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
#endif
testONNXModels("inception_v1", pb);
}

@ -40,7 +40,7 @@ TEST(Test_TensorFlow, read_inception)
ASSERT_TRUE(!sample.empty());
Mat input;
resize(sample, input, Size(224, 224));
input -= 128; // mean sub
input -= Scalar::all(117); // mean sub
Mat inputBlob = blobFromImage(input);
@ -351,8 +351,8 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD)
Mat out = net.forward();
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco_2017_11_17.detection_out.npy"));
float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1e-5;
float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0098 : 1e-3;
float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7e-3 : 1.5e-5;
float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.012 : 1e-3;
normAssertDetections(ref, out, "", 0.3, scoreDiff, iouDiff);
}
@ -366,6 +366,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
double scoresDiff = backend == DNN_BACKEND_INFERENCE_ENGINE ? 2.9e-5 : 1e-5;
for (int i = 0; i < 2; ++i)
{
std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
@ -381,7 +382,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
Mat out = net.forward();
Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/" + names[i] + ".detection_out.npy"));
normAssertDetections(ref, out, names[i].c_str(), 0.3);
normAssertDetections(ref, out, names[i].c_str(), 0.3, scoresDiff);
}
}
@ -406,7 +407,7 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN)
net.setInput(blob);
Mat out = net.forward();
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : default_l1;
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 1.1e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.021 : default_lInf;
normAssertDetections(ref, out, "", 0.4, scoreDiff, iouDiff);
}
@ -568,10 +569,6 @@ TEST_P(Test_TensorFlow_layers, slice)
if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
(target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("");
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
#endif
runTensorFlowNet("slice_4d");
}

@ -260,6 +260,11 @@ TEST_P(Test_Torch_layers, run_paralel)
TEST_P(Test_Torch_layers, net_residual)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && (target == DNN_TARGET_OPENCL ||
target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException("Test is disabled for OpenVINO 2018R5");
#endif
runTorchNet("net_residual", "", false, true);
}
@ -390,10 +395,6 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
// -model models/instance_norm/feathers.t7
TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE == 2018050000
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
#endif
checkBackend();
std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
"dnn/fast_neural_style_instance_norm_feathers.t7"};

@ -197,8 +197,7 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
centers.clear();
std::vector < std::vector<Point> > contours;
Mat tmpBinaryImage = binaryImage.clone();
findContours(tmpBinaryImage, contours, RETR_LIST, CHAIN_APPROX_NONE);
findContours(binaryImage, contours, RETR_LIST, CHAIN_APPROX_NONE);
#ifdef DEBUG_BLOB_DETECTOR
// Mat keypointsImage;
@ -214,7 +213,7 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
{
Center center;
center.confidence = 1;
Moments moms = moments(Mat(contours[contourIdx]));
Moments moms = moments(contours[contourIdx]);
if (params.filterByArea)
{
double area = moms.m00;
@ -225,7 +224,7 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
if (params.filterByCircularity)
{
double area = moms.m00;
double perimeter = arcLength(Mat(contours[contourIdx]), true);
double perimeter = arcLength(contours[contourIdx], true);
double ratio = 4 * CV_PI * area / (perimeter * perimeter);
if (ratio < params.minCircularity || ratio >= params.maxCircularity)
continue;
@ -261,9 +260,9 @@ void SimpleBlobDetectorImpl::findBlobs(InputArray _image, InputArray _binaryImag
if (params.filterByConvexity)
{
std::vector < Point > hull;
convexHull(Mat(contours[contourIdx]), hull);
double area = contourArea(Mat(contours[contourIdx]));
double hullArea = contourArea(Mat(hull));
convexHull(contours[contourIdx], hull);
double area = contourArea(contours[contourIdx]);
double hullArea = contourArea(hull);
if (fabs(hullArea) < DBL_EPSILON)
continue;
double ratio = area / hullArea;

@ -84,4 +84,26 @@ PERF_TEST_P(TestFindContoursFF, findContours,
SANITY_CHECK_NOTHING();
}
typedef TestBaseWithParam< tuple<MatDepth, int> > TestBoundingRect;
PERF_TEST_P(TestBoundingRect, BoundingRect,
Combine(
testing::Values(CV_32S, CV_32F), // points type
Values(400, 511, 1000, 10000, 100000) // points count
)
)
{
int ptType = get<0>(GetParam());
int n = get<1>(GetParam());
Mat pts(n, 2, ptType);
declare.in(pts, WARMUP_RNG);
cv::Rect rect;
TEST_CYCLE() rect = boundingRect(pts);
SANITY_CHECK_NOTHING();
}
} } // namespace

@ -11,7 +11,7 @@ typedef perf::TestBaseWithParam<Size_MatType_OutMatDepth_t> Size_MatType_OutMatD
PERF_TEST_P(Size_MatType_OutMatDepth, integral,
testing::Combine(
testing::Values(TYPICAL_MAT_SIZES),
testing::Values(CV_8UC1, CV_8UC4),
testing::Values(CV_8UC1, CV_8UC3, CV_8UC4),
testing::Values(CV_32S, CV_32F, CV_64F)
)
)

@ -213,7 +213,7 @@ int FilterEngine::start(const Size &_wholeSize, const Size &sz, const Point &ofs
}
// adjust bufstep so that the used part of the ring buffer stays compact in memory
bufStep = bufElemSize*(int)alignSize(roi.width + (!isSeparable() ? ksize.width - 1 : 0),16);
bufStep = bufElemSize*(int)alignSize(roi.width + (!isSeparable() ? ksize.width - 1 : 0),VEC_ALIGN);
dx1 = std::max(anchor.x - roi.x, 0);
dx2 = std::max(ksize.width - anchor.x - 1 + roi.x + roi.width - wholeSize.width, 0);

@ -11,16 +11,6 @@
#include "opencv2/core/softfloat.hpp"
#ifndef CV_ALWAYS_INLINE
#if defined(__GNUC__) && (__GNUC__ > 3 ||(__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
#elif defined(_MSC_VER)
#define CV_ALWAYS_INLINE __forceinline
#else
#define CV_ALWAYS_INLINE inline
#endif
#endif
namespace
{

@ -45,6 +45,7 @@
#include "opencl_kernels_imgproc.hpp"
#include <iostream>
#include "hal_replacement.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include <opencv2/core/utils/configuration.private.hpp>
/****************************************************************************************\
@ -97,73 +98,65 @@ struct MorphNoVec
int operator()(uchar**, int, uchar*, int) const { return 0; }
};
#if CV_SSE2
#if CV_SIMD
template<class VecUpdate> struct MorphRowIVec
template<class VecUpdate> struct MorphRowVec
{
enum { ESZ = VecUpdate::ESZ };
MorphRowIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
typedef typename VecUpdate::vtype vtype;
typedef typename vtype::lane_type stype;
MorphRowVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
int operator()(const uchar* src, uchar* dst, int width, int cn) const
{
if( !checkHardwareSupport(CV_CPU_SSE2) )
return 0;
cn *= ESZ;
int i, k, _ksize = ksize*cn;
width = (width & -4)*cn;
width *= cn;
VecUpdate updateOp;
for( i = 0; i <= width - 16; i += 16 )
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes )
{
__m128i s = _mm_loadu_si128((const __m128i*)(src + i));
vtype s0 = vx_load((const stype*)src + i);
vtype s1 = vx_load((const stype*)src + i + vtype::nlanes);
vtype s2 = vx_load((const stype*)src + i + 2*vtype::nlanes);
vtype s3 = vx_load((const stype*)src + i + 3*vtype::nlanes);
for (k = cn; k < _ksize; k += cn)
{
__m128i x = _mm_loadu_si128((const __m128i*)(src + i + k));
s = updateOp(s, x);
s0 = updateOp(s0, vx_load((const stype*)src + i + k));
s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes));
s2 = updateOp(s2, vx_load((const stype*)src + i + k + 2*vtype::nlanes));
s3 = updateOp(s3, vx_load((const stype*)src + i + k + 3*vtype::nlanes));
}
_mm_storeu_si128((__m128i*)(dst + i), s);
v_store((stype*)dst + i, s0);
v_store((stype*)dst + i + vtype::nlanes, s1);
v_store((stype*)dst + i + 2*vtype::nlanes, s2);
v_store((stype*)dst + i + 3*vtype::nlanes, s3);
}
for( ; i < width; i += 4 )
if( i <= width - 2*vtype::nlanes )
{
__m128i s = _mm_cvtsi32_si128(*(const int*)(src + i));
vtype s0 = vx_load((const stype*)src + i);
vtype s1 = vx_load((const stype*)src + i + vtype::nlanes);
for( k = cn; k < _ksize; k += cn )
{
__m128i x = _mm_cvtsi32_si128(*(const int*)(src + i + k));
s = updateOp(s, x);
s0 = updateOp(s0, vx_load((const stype*)src + i + k));
s1 = updateOp(s1, vx_load((const stype*)src + i + k + vtype::nlanes));
}
*(int*)(dst + i) = _mm_cvtsi128_si32(s);
v_store((stype*)dst + i, s0);
v_store((stype*)dst + i + vtype::nlanes, s1);
i += 2*vtype::nlanes;
}
return i/ESZ;
}
int ksize, anchor;
};
template<class VecUpdate> struct MorphRowFVec
{
MorphRowFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
int operator()(const uchar* src, uchar* dst, int width, int cn) const
if( i <= width - vtype::nlanes )
{
if( !checkHardwareSupport(CV_CPU_SSE) )
return 0;
int i, k, _ksize = ksize*cn;
width = (width & -4)*cn;
VecUpdate updateOp;
for( i = 0; i < width; i += 4 )
{
__m128 s = _mm_loadu_ps((const float*)src + i);
vtype s = vx_load((const stype*)src + i);
for( k = cn; k < _ksize; k += cn )
{
__m128 x = _mm_loadu_ps((const float*)src + i + k);
s = updateOp(s, x);
s = updateOp(s, vx_load((const stype*)src + i + k));
v_store((stype*)dst + i, s);
i += vtype::nlanes;
}
_mm_storeu_ps((float*)dst + i, s);
if( i <= width - vtype::nlanes/2 )
{
vtype s = vx_load_low((const stype*)src + i);
for( k = cn; k < _ksize; k += cn )
s = updateOp(s, vx_load_low((const stype*)src + i + k));
v_store_low((stype*)dst + i, s);
i += vtype::nlanes/2;
}
return i;
@ -173,419 +166,269 @@ template<class VecUpdate> struct MorphRowFVec
};
template<class VecUpdate> struct MorphColumnIVec
template<class VecUpdate> struct MorphColumnVec
{
enum { ESZ = VecUpdate::ESZ };
MorphColumnIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
int operator()(const uchar** src, uchar* dst, int dststep, int count, int width) const
typedef typename VecUpdate::vtype vtype;
typedef typename vtype::lane_type stype;
MorphColumnVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
int operator()(const uchar** _src, uchar* _dst, int dststep, int count, int width) const
{
if( !checkHardwareSupport(CV_CPU_SSE2) )
return 0;
int i = 0, k, _ksize = ksize;
width *= ESZ;
VecUpdate updateOp;
for( i = 0; i < count + ksize - 1; i++ )
CV_Assert( ((size_t)src[i] & 15) == 0 );
CV_Assert( ((size_t)_src[i] & (CV_SIMD_WIDTH-1)) == 0 );
const stype** src = (const stype**)_src;
stype* dst = (stype*)_dst;
dststep /= sizeof(dst[0]);
for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
{
for( i = 0; i <= width - 32; i += 32 )
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes)
{
const uchar* sptr = src[1] + i;
__m128i s0 = _mm_load_si128((const __m128i*)sptr);
__m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
__m128i x0, x1;
const stype* sptr = src[1] + i;
vtype s0 = vx_load_aligned(sptr);
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes);
vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes);
for( k = 2; k < _ksize; k++ )
{
sptr = src[k] + i;
x0 = _mm_load_si128((const __m128i*)sptr);
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
s0 = updateOp(s0, x0);
s1 = updateOp(s1, x1);
s0 = updateOp(s0, vx_load_aligned(sptr));
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes));
s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes));
}
sptr = src[0] + i;
x0 = _mm_load_si128((const __m128i*)sptr);
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
_mm_storeu_si128((__m128i*)(dst + i), updateOp(s0, x0));
_mm_storeu_si128((__m128i*)(dst + i + 16), updateOp(s1, x1));
v_store(dst + i, updateOp(s0, vx_load_aligned(sptr)));
v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
v_store(dst + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)));
v_store(dst + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)));
sptr = src[k] + i;
x0 = _mm_load_si128((const __m128i*)sptr);
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
_mm_storeu_si128((__m128i*)(dst + dststep + i), updateOp(s0, x0));
_mm_storeu_si128((__m128i*)(dst + dststep + i + 16), updateOp(s1, x1));
v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr)));
v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
v_store(dst + dststep + i + 2*vtype::nlanes, updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes)));
v_store(dst + dststep + i + 3*vtype::nlanes, updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes)));
}
for( ; i <= width - 8; i += 8 )
if( i <= width - 2*vtype::nlanes )
{
__m128i s0 = _mm_loadl_epi64((const __m128i*)(src[1] + i)), x0;
const stype* sptr = src[1] + i;
vtype s0 = vx_load_aligned(sptr);
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
for( k = 2; k < _ksize; k++ )
{
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
s0 = updateOp(s0, x0);
}
x0 = _mm_loadl_epi64((const __m128i*)(src[0] + i));
_mm_storel_epi64((__m128i*)(dst + i), updateOp(s0, x0));
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
_mm_storel_epi64((__m128i*)(dst + dststep + i), updateOp(s0, x0));
}
}
for( ; count > 0; count--, dst += dststep, src++ )
{
for( i = 0; i <= width - 32; i += 32 )
{
const uchar* sptr = src[0] + i;
__m128i s0 = _mm_load_si128((const __m128i*)sptr);
__m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
__m128i x0, x1;
for( k = 1; k < _ksize; k++ )
{
sptr = src[k] + i;
x0 = _mm_load_si128((const __m128i*)sptr);
x1 = _mm_load_si128((const __m128i*)(sptr + 16));
s0 = updateOp(s0, x0);
s1 = updateOp(s1, x1);
}
_mm_storeu_si128((__m128i*)(dst + i), s0);
_mm_storeu_si128((__m128i*)(dst + i + 16), s1);
s0 = updateOp(s0, vx_load_aligned(sptr));
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
}
for( ; i <= width - 8; i += 8 )
{
__m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
for( k = 1; k < _ksize; k++ )
{
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
s0 = updateOp(s0, x0);
}
_mm_storel_epi64((__m128i*)(dst + i), s0);
}
}
sptr = src[0] + i;
v_store(dst + i, updateOp(s0, vx_load_aligned(sptr)));
v_store(dst + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
return i/ESZ;
sptr = src[k] + i;
v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(sptr)));
v_store(dst + dststep + i + vtype::nlanes, updateOp(s1, vx_load_aligned(sptr + vtype::nlanes)));
i += 2*vtype::nlanes;
}
int ksize, anchor;
};
template<class VecUpdate> struct MorphColumnFVec
if( i <= width - vtype::nlanes )
{
MorphColumnFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
int operator()(const uchar** _src, uchar* _dst, int dststep, int count, int width) const
{
if( !checkHardwareSupport(CV_CPU_SSE) )
return 0;
int i = 0, k, _ksize = ksize;
VecUpdate updateOp;
for( i = 0; i < count + ksize - 1; i++ )
CV_Assert( ((size_t)_src[i] & 15) == 0 );
const float** src = (const float**)_src;
float* dst = (float*)_dst;
dststep /= sizeof(dst[0]);
for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
{
for( i = 0; i <= width - 16; i += 16 )
{
const float* sptr = src[1] + i;
__m128 s0 = _mm_load_ps(sptr);
__m128 s1 = _mm_load_ps(sptr + 4);
__m128 s2 = _mm_load_ps(sptr + 8);
__m128 s3 = _mm_load_ps(sptr + 12);
__m128 x0, x1, x2, x3;
vtype s0 = vx_load_aligned(src[1] + i);
for( k = 2; k < _ksize; k++ )
{
sptr = src[k] + i;
x0 = _mm_load_ps(sptr);
x1 = _mm_load_ps(sptr + 4);
s0 = updateOp(s0, x0);
s1 = updateOp(s1, x1);
x2 = _mm_load_ps(sptr + 8);
x3 = _mm_load_ps(sptr + 12);
s2 = updateOp(s2, x2);
s3 = updateOp(s3, x3);
}
s0 = updateOp(s0, vx_load_aligned(src[k] + i));
sptr = src[0] + i;
x0 = _mm_load_ps(sptr);
x1 = _mm_load_ps(sptr + 4);
x2 = _mm_load_ps(sptr + 8);
x3 = _mm_load_ps(sptr + 12);
_mm_storeu_ps(dst + i, updateOp(s0, x0));
_mm_storeu_ps(dst + i + 4, updateOp(s1, x1));
_mm_storeu_ps(dst + i + 8, updateOp(s2, x2));
_mm_storeu_ps(dst + i + 12, updateOp(s3, x3));
sptr = src[k] + i;
x0 = _mm_load_ps(sptr);
x1 = _mm_load_ps(sptr + 4);
x2 = _mm_load_ps(sptr + 8);
x3 = _mm_load_ps(sptr + 12);
_mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
_mm_storeu_ps(dst + dststep + i + 4, updateOp(s1, x1));
_mm_storeu_ps(dst + dststep + i + 8, updateOp(s2, x2));
_mm_storeu_ps(dst + dststep + i + 12, updateOp(s3, x3));
v_store(dst + i, updateOp(s0, vx_load_aligned(src[0] + i)));
v_store(dst + dststep + i, updateOp(s0, vx_load_aligned(src[k] + i)));
i += vtype::nlanes;
}
for( ; i <= width - 4; i += 4 )
if( i <= width - vtype::nlanes/2 )
{
__m128 s0 = _mm_load_ps(src[1] + i), x0;
vtype s0 = vx_load_low(src[1] + i);
for( k = 2; k < _ksize; k++ )
{
x0 = _mm_load_ps(src[k] + i);
s0 = updateOp(s0, x0);
}
s0 = updateOp(s0, vx_load_low(src[k] + i));
x0 = _mm_load_ps(src[0] + i);
_mm_storeu_ps(dst + i, updateOp(s0, x0));
x0 = _mm_load_ps(src[k] + i);
_mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
v_store_low(dst + i, updateOp(s0, vx_load_low(src[0] + i)));
v_store_low(dst + dststep + i, updateOp(s0, vx_load_low(src[k] + i)));
i += vtype::nlanes/2;
}
}
for( ; count > 0; count--, dst += dststep, src++ )
{
for( i = 0; i <= width - 16; i += 16 )
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes)
{
const float* sptr = src[0] + i;
__m128 s0 = _mm_load_ps(sptr);
__m128 s1 = _mm_load_ps(sptr + 4);
__m128 s2 = _mm_load_ps(sptr + 8);
__m128 s3 = _mm_load_ps(sptr + 12);
__m128 x0, x1, x2, x3;
const stype* sptr = src[0] + i;
vtype s0 = vx_load_aligned(sptr);
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
vtype s2 = vx_load_aligned(sptr + 2*vtype::nlanes);
vtype s3 = vx_load_aligned(sptr + 3*vtype::nlanes);
for( k = 1; k < _ksize; k++ )
{
sptr = src[k] + i;
x0 = _mm_load_ps(sptr);
x1 = _mm_load_ps(sptr + 4);
s0 = updateOp(s0, x0);
s1 = updateOp(s1, x1);
x2 = _mm_load_ps(sptr + 8);
x3 = _mm_load_ps(sptr + 12);
s2 = updateOp(s2, x2);
s3 = updateOp(s3, x3);
s0 = updateOp(s0, vx_load_aligned(sptr));
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
s2 = updateOp(s2, vx_load_aligned(sptr + 2*vtype::nlanes));
s3 = updateOp(s3, vx_load_aligned(sptr + 3*vtype::nlanes));
}
_mm_storeu_ps(dst + i, s0);
_mm_storeu_ps(dst + i + 4, s1);
_mm_storeu_ps(dst + i + 8, s2);
_mm_storeu_ps(dst + i + 12, s3);
v_store(dst + i, s0);
v_store(dst + i + vtype::nlanes, s1);
v_store(dst + i + 2*vtype::nlanes, s2);
v_store(dst + i + 3*vtype::nlanes, s3);
}
for( i = 0; i <= width - 4; i += 4 )
if( i <= width - 2*vtype::nlanes )
{
__m128 s0 = _mm_load_ps(src[0] + i), x0;
const stype* sptr = src[0] + i;
vtype s0 = vx_load_aligned(sptr);
vtype s1 = vx_load_aligned(sptr + vtype::nlanes);
for( k = 1; k < _ksize; k++ )
{
x0 = _mm_load_ps(src[k] + i);
s0 = updateOp(s0, x0);
}
_mm_storeu_ps(dst + i, s0);
}
sptr = src[k] + i;
s0 = updateOp(s0, vx_load_aligned(sptr));
s1 = updateOp(s1, vx_load_aligned(sptr + vtype::nlanes));
}
return i;
v_store(dst + i, s0);
v_store(dst + i + vtype::nlanes, s1);
i += 2*vtype::nlanes;
}
int ksize, anchor;
};
template<class VecUpdate> struct MorphIVec
{
enum { ESZ = VecUpdate::ESZ };
int operator()(uchar** src, int nz, uchar* dst, int width) const
{
if( !checkHardwareSupport(CV_CPU_SSE2) )
return 0;
int i, k;
width *= ESZ;
VecUpdate updateOp;
for( i = 0; i <= width - 32; i += 32 )
if( i <= width - vtype::nlanes )
{
const uchar* sptr = src[0] + i;
__m128i s0 = _mm_loadu_si128((const __m128i*)sptr);
__m128i s1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
__m128i x0, x1;
vtype s0 = vx_load_aligned(src[0] + i);
for( k = 1; k < nz; k++ )
{
sptr = src[k] + i;
x0 = _mm_loadu_si128((const __m128i*)sptr);
x1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
s0 = updateOp(s0, x0);
s1 = updateOp(s1, x1);
}
_mm_storeu_si128((__m128i*)(dst + i), s0);
_mm_storeu_si128((__m128i*)(dst + i + 16), s1);
for( k = 1; k < _ksize; k++ )
s0 = updateOp(s0, vx_load_aligned(src[k] + i));
v_store(dst + i, s0);
i += vtype::nlanes;
}
for( ; i <= width - 8; i += 8 )
if( i <= width - vtype::nlanes/2 )
{
__m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
vtype s0 = vx_load_low(src[0] + i);
for( k = 1; k < nz; k++ )
{
x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
s0 = updateOp(s0, x0);
for( k = 1; k < _ksize; k++ )
s0 = updateOp(s0, vx_load_low(src[k] + i));
v_store_low(dst + i, s0);
i += vtype::nlanes/2;
}
_mm_storel_epi64((__m128i*)(dst + i), s0);
}
return i/ESZ;
return i;
}
int ksize, anchor;
};
template<class VecUpdate> struct MorphFVec
template<class VecUpdate> struct MorphVec
{
typedef typename VecUpdate::vtype vtype;
typedef typename vtype::lane_type stype;
int operator()(uchar** _src, int nz, uchar* _dst, int width) const
{
if( !checkHardwareSupport(CV_CPU_SSE) )
return 0;
const float** src = (const float**)_src;
float* dst = (float*)_dst;
const stype** src = (const stype**)_src;
stype* dst = (stype*)_dst;
int i, k;
VecUpdate updateOp;
for( i = 0; i <= width - 16; i += 16 )
for( i = 0; i <= width - 4*vtype::nlanes; i += 4*vtype::nlanes )
{
const float* sptr = src[0] + i;
__m128 s0 = _mm_loadu_ps(sptr);
__m128 s1 = _mm_loadu_ps(sptr + 4);
__m128 s2 = _mm_loadu_ps(sptr + 8);
__m128 s3 = _mm_loadu_ps(sptr + 12);
__m128 x0, x1, x2, x3;
const stype* sptr = src[0] + i;
vtype s0 = vx_load(sptr);
vtype s1 = vx_load(sptr + vtype::nlanes);
vtype s2 = vx_load(sptr + 2*vtype::nlanes);
vtype s3 = vx_load(sptr + 3*vtype::nlanes);
for( k = 1; k < nz; k++ )
{
sptr = src[k] + i;
x0 = _mm_loadu_ps(sptr);
x1 = _mm_loadu_ps(sptr + 4);
x2 = _mm_loadu_ps(sptr + 8);
x3 = _mm_loadu_ps(sptr + 12);
s0 = updateOp(s0, x0);
s1 = updateOp(s1, x1);
s2 = updateOp(s2, x2);
s3 = updateOp(s3, x3);
s0 = updateOp(s0, vx_load(sptr));
s1 = updateOp(s1, vx_load(sptr + vtype::nlanes));
s2 = updateOp(s2, vx_load(sptr + 2*vtype::nlanes));
s3 = updateOp(s3, vx_load(sptr + 3*vtype::nlanes));
}
_mm_storeu_ps(dst + i, s0);
_mm_storeu_ps(dst + i + 4, s1);
_mm_storeu_ps(dst + i + 8, s2);
_mm_storeu_ps(dst + i + 12, s3);
v_store(dst + i, s0);
v_store(dst + i + vtype::nlanes, s1);
v_store(dst + i + 2*vtype::nlanes, s2);
v_store(dst + i + 3*vtype::nlanes, s3);
}
for( ; i <= width - 4; i += 4 )
if( i <= width - 2*vtype::nlanes )
{
__m128 s0 = _mm_loadu_ps(src[0] + i), x0;
const stype* sptr = src[0] + i;
vtype s0 = vx_load(sptr);
vtype s1 = vx_load(sptr + vtype::nlanes);
for( k = 1; k < nz; k++ )
{
x0 = _mm_loadu_ps(src[k] + i);
s0 = updateOp(s0, x0);
sptr = src[k] + i;
s0 = updateOp(s0, vx_load(sptr));
s1 = updateOp(s1, vx_load(sptr + vtype::nlanes));
}
_mm_storeu_ps(dst + i, s0);
v_store(dst + i, s0);
v_store(dst + i + vtype::nlanes, s1);
i += 2*vtype::nlanes;
}
for( ; i < width; i++ )
if( i <= width - vtype::nlanes )
{
__m128 s0 = _mm_load_ss(src[0] + i), x0;
vtype s0 = vx_load(src[0] + i);
for( k = 1; k < nz; k++ )
{
x0 = _mm_load_ss(src[k] + i);
s0 = updateOp(s0, x0);
s0 = updateOp(s0, vx_load(src[k] + i));
v_store(dst + i, s0);
i += vtype::nlanes;
}
_mm_store_ss(dst + i, s0);
if( i <= width - vtype::nlanes/2 )
{
vtype s0 = vx_load_low(src[0] + i);
for( k = 1; k < nz; k++ )
s0 = updateOp(s0, vx_load_low(src[k] + i));
v_store_low(dst + i, s0);
i += vtype::nlanes/2;
}
return i;
}
};
struct VMin8u
{
enum { ESZ = 1 };
__m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_min_epu8(a,b); }
};
struct VMax8u
{
enum { ESZ = 1 };
__m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_max_epu8(a,b); }
};
struct VMin16u
{
enum { ESZ = 2 };
__m128i operator()(const __m128i& a, const __m128i& b) const
{ return _mm_subs_epu16(a,_mm_subs_epu16(a,b)); }
};
struct VMax16u
{
enum { ESZ = 2 };
__m128i operator()(const __m128i& a, const __m128i& b) const
{ return _mm_adds_epu16(_mm_subs_epu16(a,b), b); }
};
struct VMin16s
template <typename T> struct VMin
{
enum { ESZ = 2 };
__m128i operator()(const __m128i& a, const __m128i& b) const
{ return _mm_min_epi16(a, b); }
typedef T vtype;
vtype operator()(const vtype& a, const vtype& b) const { return v_min(a,b); }
};
struct VMax16s
template <typename T> struct VMax
{
enum { ESZ = 2 };
__m128i operator()(const __m128i& a, const __m128i& b) const
{ return _mm_max_epi16(a, b); }
typedef T vtype;
vtype operator()(const vtype& a, const vtype& b) const { return v_max(a,b); }
};
struct VMin32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_min_ps(a,b); }};
struct VMax32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_max_ps(a,b); }};
typedef MorphRowIVec<VMin8u> ErodeRowVec8u;
typedef MorphRowIVec<VMax8u> DilateRowVec8u;
typedef MorphRowIVec<VMin16u> ErodeRowVec16u;
typedef MorphRowIVec<VMax16u> DilateRowVec16u;
typedef MorphRowIVec<VMin16s> ErodeRowVec16s;
typedef MorphRowIVec<VMax16s> DilateRowVec16s;
typedef MorphRowFVec<VMin32f> ErodeRowVec32f;
typedef MorphRowFVec<VMax32f> DilateRowVec32f;
typedef MorphColumnIVec<VMin8u> ErodeColumnVec8u;
typedef MorphColumnIVec<VMax8u> DilateColumnVec8u;
typedef MorphColumnIVec<VMin16u> ErodeColumnVec16u;
typedef MorphColumnIVec<VMax16u> DilateColumnVec16u;
typedef MorphColumnIVec<VMin16s> ErodeColumnVec16s;
typedef MorphColumnIVec<VMax16s> DilateColumnVec16s;
typedef MorphColumnFVec<VMin32f> ErodeColumnVec32f;
typedef MorphColumnFVec<VMax32f> DilateColumnVec32f;
typedef MorphIVec<VMin8u> ErodeVec8u;
typedef MorphIVec<VMax8u> DilateVec8u;
typedef MorphIVec<VMin16u> ErodeVec16u;
typedef MorphIVec<VMax16u> DilateVec16u;
typedef MorphIVec<VMin16s> ErodeVec16s;
typedef MorphIVec<VMax16s> DilateVec16s;
typedef MorphFVec<VMin32f> ErodeVec32f;
typedef MorphFVec<VMax32f> DilateVec32f;
typedef MorphRowVec<VMin<v_uint8> > ErodeRowVec8u;
typedef MorphRowVec<VMax<v_uint8> > DilateRowVec8u;
typedef MorphRowVec<VMin<v_uint16> > ErodeRowVec16u;
typedef MorphRowVec<VMax<v_uint16> > DilateRowVec16u;
typedef MorphRowVec<VMin<v_int16> > ErodeRowVec16s;
typedef MorphRowVec<VMax<v_int16> > DilateRowVec16s;
typedef MorphRowVec<VMin<v_float32> > ErodeRowVec32f;
typedef MorphRowVec<VMax<v_float32> > DilateRowVec32f;
typedef MorphColumnVec<VMin<v_uint8> > ErodeColumnVec8u;
typedef MorphColumnVec<VMax<v_uint8> > DilateColumnVec8u;
typedef MorphColumnVec<VMin<v_uint16> > ErodeColumnVec16u;
typedef MorphColumnVec<VMax<v_uint16> > DilateColumnVec16u;
typedef MorphColumnVec<VMin<v_int16> > ErodeColumnVec16s;
typedef MorphColumnVec<VMax<v_int16> > DilateColumnVec16s;
typedef MorphColumnVec<VMin<v_float32> > ErodeColumnVec32f;
typedef MorphColumnVec<VMax<v_float32> > DilateColumnVec32f;
typedef MorphVec<VMin<v_uint8> > ErodeVec8u;
typedef MorphVec<VMax<v_uint8> > DilateVec8u;
typedef MorphVec<VMin<v_uint16> > ErodeVec16u;
typedef MorphVec<VMax<v_uint16> > DilateVec16u;
typedef MorphVec<VMin<v_int16> > ErodeVec16s;
typedef MorphVec<VMax<v_int16> > DilateVec16s;
typedef MorphVec<VMin<v_float32> > ErodeVec32f;
typedef MorphVec<VMax<v_float32> > DilateVec32f;
#else

@ -39,6 +39,8 @@
//
//M*/
#include "precomp.hpp"
#include "opencv2/core/hal/intrin.hpp"
namespace cv
{
@ -746,53 +748,105 @@ static Rect pointSetBoundingRect( const Mat& points )
if( npoints == 0 )
return Rect();
const Point* pts = points.ptr<Point>();
Point pt = pts[0];
#if CV_SIMD
const int64_t* pts = points.ptr<int64_t>();
#if CV_SSE4_2
if(cv::checkHardwareSupport(CV_CPU_SSE4_2))
{
if( !is_float )
{
__m128i minval, maxval;
minval = maxval = _mm_loadl_epi64((const __m128i*)(&pt)); //min[0]=pt.x, min[1]=pt.y
for( i = 1; i < npoints; i++ )
v_int32 minval, maxval;
minval = maxval = v_reinterpret_as_s32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
for( i = 1; i <= npoints - v_int32::nlanes/2; i+= v_int32::nlanes/2 )
{
__m128i ptXY = _mm_loadl_epi64((const __m128i*)&pts[i]);
minval = _mm_min_epi32(ptXY, minval);
maxval = _mm_max_epi32(ptXY, maxval);
v_int32 ptXY2 = v_reinterpret_as_s32(vx_load(pts + i));
minval = v_min(ptXY2, minval);
maxval = v_max(ptXY2, maxval);
}
xmin = _mm_cvtsi128_si32(minval);
ymin = _mm_cvtsi128_si32(_mm_srli_si128(minval, 4));
xmax = _mm_cvtsi128_si32(maxval);
ymax = _mm_cvtsi128_si32(_mm_srli_si128(maxval, 4));
minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
if( i <= npoints - v_int32::nlanes/4 )
{
v_int32 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
minval = v_min(ptXY, minval);
maxval = v_max(ptXY, maxval);
i += v_int64::nlanes/2;
}
else
for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
{
__m128 minvalf, maxvalf, z = _mm_setzero_ps(), ptXY = _mm_setzero_ps();
minvalf = maxvalf = _mm_loadl_pi(z, (const __m64*)(&pt));
for( i = 1; i < npoints; i++ )
minval = v_min(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))));
maxval = v_max(v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))));
}
xmin = minval.get0();
xmax = maxval.get0();
ymin = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval))).get0();
ymax = v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval))).get0();
#if CV_SIMD_WIDTH > 16
if( i < npoints )
{
ptXY = _mm_loadl_pi(ptXY, (const __m64*)&pts[i]);
minvalf = _mm_min_ps(minvalf, ptXY);
maxvalf = _mm_max_ps(maxvalf, ptXY);
v_int32x4 minval2, maxval2;
minval2 = maxval2 = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
for( i++; i < npoints; i++ )
{
v_int32x4 ptXY = v_reinterpret_as_s32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
minval2 = v_min(ptXY, minval2);
maxval2 = v_max(ptXY, maxval2);
}
float xyminf[2], xymaxf[2];
_mm_storel_pi((__m64*)xyminf, minvalf);
_mm_storel_pi((__m64*)xymaxf, maxvalf);
xmin = cvFloor(xyminf[0]);
ymin = cvFloor(xyminf[1]);
xmax = cvFloor(xymaxf[0]);
ymax = cvFloor(xymaxf[1]);
xmin = min(xmin, minval2.get0());
xmax = max(xmax, maxval2.get0());
ymin = min(ymin, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(minval2))).get0());
ymax = max(ymax, v_reinterpret_as_s32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0());
}
#endif
}
else
#endif
{
v_float32 minval, maxval;
minval = maxval = v_reinterpret_as_f32(vx_setall_s64(*pts)); //min[0]=pt.x, min[1]=pt.y, min[2]=pt.x, min[3]=pt.y
for( i = 1; i <= npoints - v_float32::nlanes/2; i+= v_float32::nlanes/2 )
{
v_float32 ptXY2 = v_reinterpret_as_f32(vx_load(pts + i));
minval = v_min(ptXY2, minval);
maxval = v_max(ptXY2, maxval);
}
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
if( i <= npoints - v_float32::nlanes/4 )
{
v_float32 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(vx_load_low(pts + i))));
minval = v_min(ptXY, minval);
maxval = v_max(ptXY, maxval);
i += v_float32::nlanes/4;
}
for(int j = 16; j < CV_SIMD_WIDTH; j*=2)
{
minval = v_min(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(minval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))));
maxval = v_max(v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(maxval))), v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))));
}
xmin = cvFloor(minval.get0());
xmax = cvFloor(maxval.get0());
ymin = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval))).get0());
ymax = cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval))).get0());
#if CV_SIMD_WIDTH > 16
if( i < npoints )
{
v_float32x4 minval2, maxval2;
minval2 = maxval2 = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
for( i++; i < npoints; i++ )
{
v_float32x4 ptXY = v_reinterpret_as_f32(v_expand_low(v_reinterpret_as_u32(v_load_low(pts + i))));
minval2 = v_min(ptXY, minval2);
maxval2 = v_max(ptXY, maxval2);
}
xmin = min(xmin, cvFloor(minval2.get0()));
xmax = max(xmax, cvFloor(maxval2.get0()));
ymin = min(ymin, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(minval2))).get0()));
ymax = max(ymax, cvFloor(v_reinterpret_as_f32(v_expand_high(v_reinterpret_as_u32(maxval2))).get0()));
}
#endif
}
#else
const Point* pts = points.ptr<Point>();
Point pt = pts[0];
if( !is_float )
{
xmin = xmax = pt.x;
@ -848,7 +902,7 @@ static Rect pointSetBoundingRect( const Mat& points )
v.i = CV_TOGGLE_FLT(xmax); xmax = cvFloor(v.f);
v.i = CV_TOGGLE_FLT(ymax); ymax = cvFloor(v.f);
}
}
#endif
return Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1);
}

@ -0,0 +1,262 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#include "precomp.hpp"
#include "sumpixels.hpp"
namespace cv {
namespace { // Anonymous namespace to avoid exposing the implementation classes
//
// NOTE: Look at the bottom of the file for the entry-point function for external callers
//
// At the moment only 3 channel support untilted is supported
// More channel support coming soon.
// TODO: Add support for sqsum and 1,2, and 4 channels
class IntegralCalculator_3Channel {
public:
IntegralCalculator_3Channel() {};
void calculate_integral_avx512(const uchar *src, size_t _srcstep,
double *sum, size_t _sumstep,
double *sqsum, size_t _sqsumstep,
int width, int height, int cn)
{
const int srcstep = (int)(_srcstep/sizeof(uchar));
const int sumstep = (int)(_sumstep/sizeof(double));
const int sqsumstep = (int)(_sqsumstep/sizeof(double));
const int ops_per_line = width * cn;
// Clear the first line of the sum as per spec (see integral documentation)
// Also adjust the index of sum and sqsum to be at the real 0th element
// and not point to the border pixel so it stays in sync with the src pointer
memset( sum, 0, (ops_per_line+cn)*sizeof(double));
sum += cn;
if (sqsum) {
memset( sqsum, 0, (ops_per_line+cn)*sizeof(double));
sqsum += cn;
}
// Now calculate the integral over the whole image one line at a time
for(int y = 0; y < height; y++) {
const uchar * src_line = &src[y*srcstep];
double * sum_above = &sum[y*sumstep];
double * sum_line = &sum_above[sumstep];
double * sqsum_above = (sqsum) ? &sqsum[y*sqsumstep] : NULL;
double * sqsum_line = (sqsum) ? &sqsum_above[sqsumstep] : NULL;
integral_line_3channel_avx512(src_line, sum_line, sum_above, sqsum_line, sqsum_above, ops_per_line);
}
}
static inline
void integral_line_3channel_avx512(const uchar *srcs,
double *sums, double *sums_above,
double *sqsums, double *sqsums_above,
int num_ops_in_line)
{
__m512i sum_accumulator = _mm512_setzero_si512(); // holds rolling sums for the line
__m512i sqsum_accumulator = _mm512_setzero_si512(); // holds rolling sqsums for the line
// The first element on each line must be zeroes as per spec (see integral documentation)
set_border_pixel_value(sums, sqsums);
// Do all 64 byte chunk operations then do the last bits that don't fit in a 64 byte chunk
aligned_integral( srcs, sums, sums_above, sqsums, sqsums_above, sum_accumulator, sqsum_accumulator, num_ops_in_line);
post_aligned_integral(srcs, sums, sums_above, sqsums, sqsums_above, sum_accumulator, sqsum_accumulator, num_ops_in_line);
}
static inline
void set_border_pixel_value(double *sums, double *sqsums)
{
// Sets the border pixel value to 0s.
// Note the hard coded -3 and the 0x7 mask is because we only support 3 channel right now
__m512i zeroes = _mm512_setzero_si512();
_mm512_mask_storeu_epi64(&sums[-3], 0x7, zeroes);
if (sqsums)
_mm512_mask_storeu_epi64(&sqsums[-3], 0x7, zeroes);
}
static inline
void aligned_integral(const uchar *&srcs,
double *&sums, double *&sums_above,
double *&sqsum, double *&sqsum_above,
__m512i &sum_accumulator, __m512i &sqsum_accumulator,
int num_ops_in_line)
{
// This function handles full 64 byte chunks of the source data at a time until it gets to the part of
// the line that no longer contains a full 64 byte chunk. Other code will handle the last part.
const int num_chunks = num_ops_in_line >> 6; // quick int divide by 64
for (int index_64byte_chunk = 0; index_64byte_chunk < num_chunks; index_64byte_chunk++){
integral_64_operations_avx512((__m512i *) srcs,
(__m512i *) sums, (__m512i *) sums_above,
(__m512i *) sqsum, (__m512i *) sqsum_above,
0xFFFFFFFFFFFFFFFF, sum_accumulator, sqsum_accumulator);
srcs+=64; sums+=64; sums_above+=64;
if (sqsum){ sqsum+= 64; sqsum_above+=64; }
}
}
static inline
void post_aligned_integral(const uchar *srcs,
const double *sums, const double *sums_above,
const double *sqsum, const double *sqsum_above,
__m512i &sum_accumulator, __m512i &sqsum_accumulator,
int num_ops_in_line)
{
// This function handles the last few straggling operations that are not a full chunk of 64 operations
// We use the same algorithm, but we calculate a different operation mask using (num_ops % 64).
const unsigned int num_operations = (unsigned int) num_ops_in_line & 0x3F; // Quick int modulo 64
if (num_operations > 0) {
__mmask64 operation_mask = (1ULL << num_operations) - 1ULL;
integral_64_operations_avx512((__m512i *) srcs, (__m512i *) sums, (__m512i *) sums_above,
(__m512i *) sqsum, (__m512i *) sqsum_above,
operation_mask, sum_accumulator, sqsum_accumulator);
}
}
static inline
void integral_64_operations_avx512(const __m512i *srcs,
__m512i *sums, const __m512i *sums_above,
__m512i *sqsums, const __m512i *sqsums_above,
__mmask64 data_mask,
__m512i &sum_accumulator, __m512i &sqsum_accumulator)
{
__m512i src_64byte_chunk = read_64_bytes(srcs, data_mask);
for(int num_16byte_chunks=0; num_16byte_chunks<4; num_16byte_chunks++) {
__m128i src_16bytes = _mm512_extracti64x2_epi64(src_64byte_chunk, 0x0); // Get lower 16 bytes of data
for (int num_8byte_chunks = 0; num_8byte_chunks < 2; num_8byte_chunks++) {
__m512i src_longs = convert_lower_8bytes_to_longs(src_16bytes);
// Calculate integral for the sum on the 8 entries
integral_8_operations(src_longs, sums_above, data_mask, sums, sum_accumulator);
sums++; sums_above++;
if (sqsums){ // Calculate integral for the sum on the 8 entries
__m512i squared_source = _mm512_mullo_epi64(src_longs, src_longs);
integral_8_operations(squared_source, sqsums_above, data_mask, sqsums, sqsum_accumulator);
sqsums++; sqsums_above++;
}
// Prepare for next iteration of inner loop
// shift source to align next 8 bytes to lane 0 and shift the mask
src_16bytes = shift_right_8_bytes(src_16bytes);
data_mask = data_mask >> 8;
}
// Prepare for next iteration of outer loop
src_64byte_chunk = shift_right_16_bytes(src_64byte_chunk);
}
}
static inline
void integral_8_operations(const __m512i src_longs, const __m512i *above_values_ptr, __mmask64 data_mask,
__m512i *results_ptr, __m512i &accumulator)
{
_mm512_mask_storeu_pd(
results_ptr, // Store the result here
data_mask, // Using the data mask to avoid overrunning the line
calculate_integral( // Writing the value of the integral derived from:
src_longs, // input data
_mm512_maskz_loadu_pd(data_mask, above_values_ptr), // and the results from line above
accumulator // keeping track of the accumulator
)
);
}
static inline
__m512d calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator)
{
__m512i carryover_idxs = _mm512_set_epi64(6, 5, 7, 6, 5, 7, 6, 5);
// Align data to prepare for the adds:
// shifts data left by 3 and 6 qwords(lanes) and gets rolling sum in all lanes
// Vertical LANES: 76543210
// src_longs : HGFEDCBA
// shited3lanes : + EDCBA
// shifted6lanes : + BA
// carry_over_idxs : + 65765765 (index position of result from previous iteration)
// = integral
__m512i shifted3lanes = _mm512_maskz_expand_epi64(0xF8, src_longs);
__m512i shifted6lanes = _mm512_maskz_expand_epi64(0xC0, src_longs);
__m512i carry_over = _mm512_permutex2var_epi64(accumulator, carryover_idxs, accumulator);
// Do the adds in tree form (shift3 + shift 6) + (current_source_values + accumulator)
__m512i sum_shift3and6 = _mm512_add_epi64(shifted3lanes, shifted6lanes);
__m512i sum_src_carry = _mm512_add_epi64(src_longs, carry_over);
accumulator = _mm512_add_epi64(sum_shift3and6, sum_src_carry);
// Convert to packed double and add to the line above to get the true integral value
__m512d accumulator_pd = _mm512_cvtepu64_pd(accumulator);
__m512d integral_pd = _mm512_add_pd(accumulator_pd, above_values);
return integral_pd;
}
static inline
__m512i read_64_bytes(const __m512i *srcs, __mmask64 data_mask) {
return _mm512_maskz_loadu_epi8(data_mask, srcs);
}
static inline
__m512i convert_lower_8bytes_to_longs(__m128i src_16bytes) {
return _mm512_cvtepu8_epi64(src_16bytes);
}
static inline
__m128i shift_right_8_bytes(__m128i src_16bytes) {
return _mm_maskz_compress_epi64(2, src_16bytes);
}
static inline
__m512i shift_right_16_bytes(__m512i src_64byte_chunk) {
return _mm512_maskz_compress_epi64(0xFC, src_64byte_chunk);
}
};
} // end of anonymous namespace
namespace opt_AVX512_SKX {
// This is the implementation for the external callers interface entry point.
// It should be the only function called into this file from outside
// Any new implementations should be directed from here
void calculate_integral_avx512(const uchar *src, size_t _srcstep,
double *sum, size_t _sumstep,
double *sqsum, size_t _sqsumstep,
int width, int height, int cn)
{
IntegralCalculator_3Channel calculator;
calculator.calculate_integral_avx512(src, _srcstep, sum, _sumstep, sqsum, _sqsumstep, width, height, cn);
}
} // end namespace opt_AVX512_SXK
} // end namespace cv

@ -10,7 +10,7 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2000-2008,2019 Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2014, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
@ -44,6 +44,7 @@
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "sumpixels.hpp"
namespace cv
@ -62,6 +63,37 @@ struct Integral_SIMD
}
};
template <>
struct Integral_SIMD<uchar, double, double> {
Integral_SIMD() {};
bool operator()(const uchar *src, size_t _srcstep,
double *sum, size_t _sumstep,
double *sqsum, size_t _sqsumstep,
double *tilted, size_t _tiltedstep,
int width, int height, int cn) const
{
#if CV_TRY_AVX512_SKX
CV_UNUSED(_tiltedstep);
// TODO: Add support for 1,2, and 4 channels
if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && cn == 3){
opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep,
sqsum, _sqsumstep, width, height, cn);
return true;
}
#else
// Avoid warnings in some builds
CV_UNUSED(src); CV_UNUSED(_srcstep); CV_UNUSED(sum); CV_UNUSED(_sumstep);
CV_UNUSED(sqsum); CV_UNUSED(_sqsumstep); CV_UNUSED(tilted); CV_UNUSED(_tiltedstep);
CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(cn);
#endif
return false;
}
};
#if CV_SIMD && CV_SIMD_WIDTH <= 64
template <>

@ -0,0 +1,25 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2019, Intel Corporation, all rights reserved.
#ifndef OPENCV_IMGPROC_SUM_PIXELS_HPP
#define OPENCV_IMGPROC_SUM_PIXELS_HPP
namespace cv
{
namespace opt_AVX512_SKX
{
#if CV_TRY_AVX512_SKX
void calculate_integral_avx512(
const uchar *src, size_t _srcstep,
double *sum, size_t _sumstep,
double *sqsum, size_t _sqsumstep,
int width, int height, int cn);
#endif
} // end namespace opt_AVX512_SKX
} // end namespace cv
#endif

@ -190,22 +190,19 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
int j = 0;
const uchar* src = _src.ptr();
uchar* dst = _dst.ptr();
#if CV_SIMD128
bool useSIMD = checkHardwareSupport( CV_CPU_SSE2 ) || checkHardwareSupport( CV_CPU_NEON );
if( useSIMD )
{
v_uint8x16 thresh_u = v_setall_u8( thresh );
v_uint8x16 maxval16 = v_setall_u8( maxval );
#if CV_SIMD
v_uint8 thresh_u = vx_setall_u8( thresh );
v_uint8 maxval16 = vx_setall_u8( maxval );
switch( type )
{
case THRESH_BINARY:
for( int i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
for( j = 0; j <= roi.width - 16; j += 16 )
for( j = 0; j <= roi.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
v_uint8x16 v0;
v0 = v_load( src + j );
v_uint8 v0;
v0 = vx_load( src + j );
v0 = thresh_u < v0;
v0 = v0 & maxval16;
v_store( dst + j, v0 );
@ -216,10 +213,10 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case THRESH_BINARY_INV:
for( int i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
for( j = 0; j <= roi.width - 16; j += 16 )
for( j = 0; j <= roi.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
v_uint8x16 v0;
v0 = v_load( src + j );
v_uint8 v0;
v0 = vx_load( src + j );
v0 = v0 <= thresh_u;
v0 = v0 & maxval16;
v_store( dst + j, v0 );
@ -230,10 +227,10 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case THRESH_TRUNC:
for( int i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
for( j = 0; j <= roi.width - 16; j += 16 )
for( j = 0; j <= roi.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
v_uint8x16 v0;
v0 = v_load( src + j );
v_uint8 v0;
v0 = vx_load( src + j );
v0 = v0 - ( v0 - thresh_u );
v_store( dst + j, v0 );
}
@ -243,10 +240,10 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case THRESH_TOZERO:
for( int i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
for( j = 0; j <= roi.width - 16; j += 16 )
for( j = 0; j <= roi.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
v_uint8x16 v0;
v0 = v_load( src + j );
v_uint8 v0;
v0 = vx_load( src + j );
v0 = ( thresh_u < v0 ) & v0;
v_store( dst + j, v0 );
}
@ -256,17 +253,16 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case THRESH_TOZERO_INV:
for( int i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
for( j = 0; j <= roi.width - 16; j += 16 )
for( j = 0; j <= roi.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
v_uint8x16 v0;
v0 = v_load( src + j );
v_uint8 v0;
v0 = vx_load( src + j );
v0 = ( v0 <= thresh_u ) & v0;
v_store( dst + j, v0 );
}
}
break;
}
}
#endif
int j_scalar = j;
@ -355,30 +351,35 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
const ushort* src = _src.ptr<ushort>();
ushort* dst = _dst.ptr<ushort>();
#if CV_SIMD128
bool useSIMD = checkHardwareSupport(CV_CPU_SSE2) || checkHardwareSupport(CV_CPU_NEON);
if (useSIMD)
{
#if CV_SIMD
int i, j;
v_uint16x8 thresh_u = v_setall_u16(thresh);
v_uint16x8 maxval16 = v_setall_u16(maxval);
v_uint16 thresh_u = vx_setall_u16(thresh);
v_uint16 maxval16 = vx_setall_u16(maxval);
switch (type)
{
case THRESH_BINARY:
for (i = 0; i < roi.height; i++, src += src_step, dst += dst_step)
{
for (j = 0; j <= roi.width - 16; j += 16)
for (j = 0; j <= roi.width - 2*v_uint16::nlanes; j += 2*v_uint16::nlanes)
{
v_uint16x8 v0, v1;
v0 = v_load(src + j);
v1 = v_load(src + j + 8);
v_uint16 v0, v1;
v0 = vx_load(src + j);
v1 = vx_load(src + j + v_uint16::nlanes);
v0 = thresh_u < v0;
v1 = thresh_u < v1;
v0 = v0 & maxval16;
v1 = v1 & maxval16;
v_store(dst + j, v0);
v_store(dst + j + 8, v1);
v_store(dst + j + v_uint16::nlanes, v1);
}
if (j <= roi.width - v_uint16::nlanes)
{
v_uint16 v0 = vx_load(src + j);
v0 = thresh_u < v0;
v0 = v0 & maxval16;
v_store(dst + j, v0);
j += v_uint16::nlanes;
}
for (; j < roi.width; j++)
@ -390,17 +391,25 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for (i = 0; i < roi.height; i++, src += src_step, dst += dst_step)
{
j = 0;
for (; j <= roi.width - 16; j += 16)
for (; j <= roi.width - 2*v_uint16::nlanes; j += 2*v_uint16::nlanes)
{
v_uint16x8 v0, v1;
v0 = v_load(src + j);
v1 = v_load(src + j + 8);
v_uint16 v0, v1;
v0 = vx_load(src + j);
v1 = vx_load(src + j + v_uint16::nlanes);
v0 = v0 <= thresh_u;
v1 = v1 <= thresh_u;
v0 = v0 & maxval16;
v1 = v1 & maxval16;
v_store(dst + j, v0);
v_store(dst + j + 8, v1);
v_store(dst + j + v_uint16::nlanes, v1);
}
if (j <= roi.width - v_uint16::nlanes)
{
v_uint16 v0 = vx_load(src + j);
v0 = v0 <= thresh_u;
v0 = v0 & maxval16;
v_store(dst + j, v0);
j += v_uint16::nlanes;
}
for (; j < roi.width; j++)
@ -412,15 +421,22 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for (i = 0; i < roi.height; i++, src += src_step, dst += dst_step)
{
j = 0;
for (; j <= roi.width - 16; j += 16)
for (; j <= roi.width - 2*v_uint16::nlanes; j += 2*v_uint16::nlanes)
{
v_uint16x8 v0, v1;
v0 = v_load(src + j);
v1 = v_load(src + j + 8);
v_uint16 v0, v1;
v0 = vx_load(src + j);
v1 = vx_load(src + j + v_uint16::nlanes);
v0 = v_min(v0, thresh_u);
v1 = v_min(v1, thresh_u);
v_store(dst + j, v0);
v_store(dst + j + 8, v1);
v_store(dst + j + v_uint16::nlanes, v1);
}
if (j <= roi.width - v_uint16::nlanes)
{
v_uint16 v0 = vx_load(src + j);
v0 = v_min(v0, thresh_u);
v_store(dst + j, v0);
j += v_uint16::nlanes;
}
for (; j < roi.width; j++)
@ -432,15 +448,22 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for (i = 0; i < roi.height; i++, src += src_step, dst += dst_step)
{
j = 0;
for (; j <= roi.width - 16; j += 16)
for (; j <= roi.width - 2*v_uint16::nlanes; j += 2*v_uint16::nlanes)
{
v_uint16x8 v0, v1;
v0 = v_load(src + j);
v1 = v_load(src + j + 8);
v_uint16 v0, v1;
v0 = vx_load(src + j);
v1 = vx_load(src + j + v_uint16::nlanes);
v0 = (thresh_u < v0) & v0;
v1 = (thresh_u < v1) & v1;
v_store(dst + j, v0);
v_store(dst + j + 8, v1);
v_store(dst + j + v_uint16::nlanes, v1);
}
if (j <= roi.width - v_uint16::nlanes)
{
v_uint16 v0 = vx_load(src + j);
v0 = (thresh_u < v0) & v0;
v_store(dst + j, v0);
j += v_uint16::nlanes;
}
for (; j < roi.width; j++)
@ -452,15 +475,22 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for (i = 0; i < roi.height; i++, src += src_step, dst += dst_step)
{
j = 0;
for (; j <= roi.width - 16; j += 16)
for (; j <= roi.width - 2*v_uint16::nlanes; j += 2*v_uint16::nlanes)
{
v_uint16x8 v0, v1;
v0 = v_load(src + j);
v1 = v_load(src + j + 8);
v_uint16 v0, v1;
v0 = vx_load(src + j);
v1 = vx_load(src + j + v_uint16::nlanes);
v0 = (v0 <= thresh_u) & v0;
v1 = (v1 <= thresh_u) & v1;
v_store(dst + j, v0);
v_store(dst + j + 8, v1);
v_store(dst + j + v_uint16::nlanes, v1);
}
if (j <= roi.width - v_uint16::nlanes)
{
v_uint16 v0 = vx_load(src + j);
v0 = (v0 <= thresh_u) & v0;
v_store(dst + j, v0);
j += v_uint16::nlanes;
}
for (; j < roi.width; j++)
@ -468,12 +498,9 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
}
break;
}
}
else
#endif
{
#else
threshGeneric<ushort>(roi, src, src_step, dst, dst_step, thresh, maxval, type);
}
#endif
}
static void
@ -544,13 +571,10 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
}
#endif
#if CV_SIMD128
bool useSIMD = checkHardwareSupport( CV_CPU_SSE2 ) || checkHardwareSupport( CV_CPU_NEON );
if( useSIMD )
{
#if CV_SIMD
int i, j;
v_int16x8 thresh8 = v_setall_s16( thresh );
v_int16x8 maxval8 = v_setall_s16( maxval );
v_int16 thresh8 = vx_setall_s16( thresh );
v_int16 maxval8 = vx_setall_s16( maxval );
switch( type )
{
@ -558,17 +582,25 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 16; j += 16 )
for( ; j <= roi.width - 2*v_int16::nlanes; j += 2*v_int16::nlanes )
{
v_int16x8 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 8 );
v_int16 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_int16::nlanes );
v0 = thresh8 < v0;
v1 = thresh8 < v1;
v0 = v0 & maxval8;
v1 = v1 & maxval8;
v_store( dst + j, v0 );
v_store( dst + j + 8, v1 );
v_store( dst + j + v_int16::nlanes, v1 );
}
if( j <= roi.width - v_int16::nlanes )
{
v_int16 v0 = vx_load( src + j );
v0 = thresh8 < v0;
v0 = v0 & maxval8;
v_store( dst + j, v0 );
j += v_int16::nlanes;
}
for( ; j < roi.width; j++ )
@ -580,17 +612,25 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 16; j += 16 )
for( ; j <= roi.width - 2*v_int16::nlanes; j += 2*v_int16::nlanes )
{
v_int16x8 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 8 );
v_int16 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_int16::nlanes );
v0 = v0 <= thresh8;
v1 = v1 <= thresh8;
v0 = v0 & maxval8;
v1 = v1 & maxval8;
v_store( dst + j, v0 );
v_store( dst + j + 8, v1 );
v_store( dst + j + v_int16::nlanes, v1 );
}
if( j <= roi.width - v_int16::nlanes )
{
v_int16 v0 = vx_load( src + j );
v0 = v0 <= thresh8;
v0 = v0 & maxval8;
v_store( dst + j, v0 );
j += v_int16::nlanes;
}
for( ; j < roi.width; j++ )
@ -602,15 +642,22 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 16; j += 16 )
for( ; j <= roi.width - 2*v_int16::nlanes; j += 2*v_int16::nlanes )
{
v_int16x8 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 8 );
v_int16 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_int16::nlanes );
v0 = v_min( v0, thresh8 );
v1 = v_min( v1, thresh8 );
v_store( dst + j, v0 );
v_store( dst + j + 8, v1 );
v_store( dst + j + v_int16::nlanes, v1 );
}
if( j <= roi.width - v_int16::nlanes )
{
v_int16 v0 = vx_load( src + j );
v0 = v_min( v0, thresh8 );
v_store( dst + j, v0 );
j += v_int16::nlanes;
}
for( ; j < roi.width; j++ )
@ -622,15 +669,22 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 16; j += 16 )
for( ; j <= roi.width - 2*v_int16::nlanes; j += 2*v_int16::nlanes )
{
v_int16x8 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 8 );
v_int16 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_int16::nlanes );
v0 = ( thresh8 < v0 ) & v0;
v1 = ( thresh8 < v1 ) & v1;
v_store( dst + j, v0 );
v_store( dst + j + 8, v1 );
v_store( dst + j + v_int16::nlanes, v1 );
}
if( j <= roi.width - v_int16::nlanes )
{
v_int16 v0 = vx_load( src + j );
v0 = ( thresh8 < v0 ) & v0;
v_store( dst + j, v0 );
j += v_int16::nlanes;
}
for( ; j < roi.width; j++ )
@ -642,15 +696,22 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 16; j += 16 )
for( ; j <= roi.width - 2*v_int16::nlanes; j += 2*v_int16::nlanes )
{
v_int16x8 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 8 );
v_int16 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_int16::nlanes );
v0 = ( v0 <= thresh8 ) & v0;
v1 = ( v1 <= thresh8 ) & v1;
v_store( dst + j, v0 );
v_store( dst + j + 8, v1 );
v_store( dst + j + v_int16::nlanes, v1 );
}
if( j <= roi.width - v_int16::nlanes )
{
v_int16 v0 = vx_load( src + j );
v0 = ( v0 <= thresh8 ) & v0;
v_store( dst + j, v0 );
j += v_int16::nlanes;
}
for( ; j < roi.width; j++ )
@ -660,12 +721,9 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
default:
CV_Error( CV_StsBadArg, "" ); return;
}
}
else
#endif
{
#else
threshGeneric<short>(roi, src, src_step, dst, dst_step, thresh, maxval, type);
}
#endif
}
@ -719,13 +777,10 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
}
#endif
#if CV_SIMD128
bool useSIMD = checkHardwareSupport( CV_CPU_SSE2 ) || checkHardwareSupport( CV_CPU_NEON );
if( useSIMD )
{
#if CV_SIMD
int i, j;
v_float32x4 thresh4 = v_setall_f32( thresh );
v_float32x4 maxval4 = v_setall_f32( maxval );
v_float32 thresh4 = vx_setall_f32( thresh );
v_float32 maxval4 = vx_setall_f32( maxval );
switch( type )
{
@ -733,17 +788,25 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 8; j += 8 )
for( ; j <= roi.width - 2*v_float32::nlanes; j += 2*v_float32::nlanes )
{
v_float32x4 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 4 );
v_float32 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float32::nlanes );
v0 = thresh4 < v0;
v1 = thresh4 < v1;
v0 = v0 & maxval4;
v1 = v1 & maxval4;
v_store( dst + j, v0 );
v_store( dst + j + 4, v1 );
v_store( dst + j + v_float32::nlanes, v1 );
}
if( j <= roi.width - v_float32::nlanes )
{
v_float32 v0 = vx_load( src + j );
v0 = thresh4 < v0;
v0 = v0 & maxval4;
v_store( dst + j, v0 );
j += v_float32::nlanes;
}
for( ; j < roi.width; j++ )
@ -755,17 +818,25 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 8; j += 8 )
for( ; j <= roi.width - 2*v_float32::nlanes; j += 2*v_float32::nlanes )
{
v_float32x4 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 4 );
v_float32 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float32::nlanes );
v0 = v0 <= thresh4;
v1 = v1 <= thresh4;
v0 = v0 & maxval4;
v1 = v1 & maxval4;
v_store( dst + j, v0 );
v_store( dst + j + 4, v1 );
v_store( dst + j + v_float32::nlanes, v1 );
}
if( j <= roi.width - v_float32::nlanes )
{
v_float32 v0 = vx_load( src + j );
v0 = v0 <= thresh4;
v0 = v0 & maxval4;
v_store( dst + j, v0 );
j += v_float32::nlanes;
}
for( ; j < roi.width; j++ )
@ -777,15 +848,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 8; j += 8 )
for( ; j <= roi.width - 2*v_float32::nlanes; j += 2*v_float32::nlanes )
{
v_float32x4 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 4 );
v_float32 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float32::nlanes );
v0 = v_min( v0, thresh4 );
v1 = v_min( v1, thresh4 );
v_store( dst + j, v0 );
v_store( dst + j + 4, v1 );
v_store( dst + j + v_float32::nlanes, v1 );
}
if( j <= roi.width - v_float32::nlanes )
{
v_float32 v0 = vx_load( src + j );
v0 = v_min( v0, thresh4 );
v_store( dst + j, v0 );
j += v_float32::nlanes;
}
for( ; j < roi.width; j++ )
@ -797,15 +875,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 8; j += 8 )
for( ; j <= roi.width - 2*v_float32::nlanes; j += 2*v_float32::nlanes )
{
v_float32x4 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 4 );
v_float32 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float32::nlanes );
v0 = ( thresh4 < v0 ) & v0;
v1 = ( thresh4 < v1 ) & v1;
v_store( dst + j, v0 );
v_store( dst + j + 4, v1 );
v_store( dst + j + v_float32::nlanes, v1 );
}
if( j <= roi.width - v_float32::nlanes )
{
v_float32 v0 = vx_load( src + j );
v0 = ( thresh4 < v0 ) & v0;
v_store( dst + j, v0 );
j += v_float32::nlanes;
}
for( ; j < roi.width; j++ )
@ -817,15 +902,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 8; j += 8 )
for( ; j <= roi.width - 2*v_float32::nlanes; j += 2*v_float32::nlanes )
{
v_float32x4 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 4 );
v_float32 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float32::nlanes );
v0 = ( v0 <= thresh4 ) & v0;
v1 = ( v1 <= thresh4 ) & v1;
v_store( dst + j, v0 );
v_store( dst + j + 4, v1 );
v_store( dst + j + v_float32::nlanes, v1 );
}
if( j <= roi.width - v_float32::nlanes )
{
v_float32 v0 = vx_load( src + j );
v0 = ( v0 <= thresh4 ) & v0;
v_store( dst + j, v0 );
j += v_float32::nlanes;
}
for( ; j < roi.width; j++ )
@ -835,12 +927,9 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
default:
CV_Error( CV_StsBadArg, "" ); return;
}
}
else
#endif
{
#else
threshGeneric<float>(roi, src, src_step, dst, dst_step, thresh, maxval, type);
}
#endif
}
static void
@ -859,13 +948,10 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
roi.height = 1;
}
#if CV_SIMD128_64F
bool useSIMD = checkHardwareSupport( CV_CPU_SSE2 ) || checkHardwareSupport( CV_CPU_NEON );
if( useSIMD )
{
#if CV_SIMD_64F
int i, j;
v_float64x2 thresh2 = v_setall_f64( thresh );
v_float64x2 maxval2 = v_setall_f64( maxval );
v_float64 thresh2 = vx_setall_f64( thresh );
v_float64 maxval2 = vx_setall_f64( maxval );
switch( type )
{
@ -873,17 +959,25 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 4; j += 4 )
for( ; j <= roi.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes )
{
v_float64x2 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 2 );
v_float64 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float64::nlanes );
v0 = thresh2 < v0;
v1 = thresh2 < v1;
v0 = v0 & maxval2;
v1 = v1 & maxval2;
v_store( dst + j, v0 );
v_store( dst + j + 2, v1 );
v_store( dst + j + v_float64::nlanes, v1 );
}
if( j <= roi.width - v_float64::nlanes )
{
v_float64 v0 = vx_load( src + j );
v0 = thresh2 < v0;
v0 = v0 & maxval2;
v_store( dst + j, v0 );
j += v_float64::nlanes;
}
for( ; j < roi.width; j++ )
@ -895,17 +989,25 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 4; j += 4 )
for( ; j <= roi.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes )
{
v_float64x2 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 2 );
v_float64 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float64::nlanes );
v0 = v0 <= thresh2;
v1 = v1 <= thresh2;
v0 = v0 & maxval2;
v1 = v1 & maxval2;
v_store( dst + j, v0 );
v_store( dst + j + 2, v1 );
v_store( dst + j + v_float64::nlanes, v1 );
}
if( j <= roi.width - v_float64::nlanes )
{
v_float64 v0 = vx_load( src + j );
v0 = v0 <= thresh2;
v0 = v0 & maxval2;
v_store( dst + j, v0 );
j += v_float64::nlanes;
}
for( ; j < roi.width; j++ )
@ -917,15 +1019,22 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 4; j += 4 )
for( ; j <= roi.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes )
{
v_float64x2 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 2 );
v_float64 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float64::nlanes );
v0 = v_min( v0, thresh2 );
v1 = v_min( v1, thresh2 );
v_store( dst + j, v0 );
v_store( dst + j + 2, v1 );
v_store( dst + j + v_float64::nlanes, v1 );
}
if( j <= roi.width - v_float64::nlanes )
{
v_float64 v0 = vx_load( src + j );
v0 = v_min( v0, thresh2 );
v_store( dst + j, v0 );
j += v_float64::nlanes;
}
for( ; j < roi.width; j++ )
@ -937,15 +1046,22 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 4; j += 4 )
for( ; j <= roi.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes )
{
v_float64x2 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 2 );
v_float64 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float64::nlanes );
v0 = ( thresh2 < v0 ) & v0;
v1 = ( thresh2 < v1 ) & v1;
v_store( dst + j, v0 );
v_store( dst + j + 2, v1 );
v_store( dst + j + v_float64::nlanes, v1 );
}
if( j <= roi.width - v_float64::nlanes )
{
v_float64 v0 = vx_load( src + j );
v0 = ( thresh2 < v0 ) & v0;
v_store( dst + j, v0 );
j += v_float64::nlanes;
}
for( ; j < roi.width; j++ )
@ -957,15 +1073,22 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
{
j = 0;
for( ; j <= roi.width - 4; j += 4 )
for( ; j <= roi.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes )
{
v_float64x2 v0, v1;
v0 = v_load( src + j );
v1 = v_load( src + j + 2 );
v_float64 v0, v1;
v0 = vx_load( src + j );
v1 = vx_load( src + j + v_float64::nlanes );
v0 = ( v0 <= thresh2 ) & v0;
v1 = ( v1 <= thresh2 ) & v1;
v_store( dst + j, v0 );
v_store( dst + j + 2, v1 );
v_store( dst + j + v_float64::nlanes, v1 );
}
if( j <= roi.width - v_float64::nlanes )
{
v_float64 v0 = vx_load( src + j );
v0 = ( v0 <= thresh2 ) & v0;
v_store( dst + j, v0 );
j += v_float64::nlanes;
}
for( ; j < roi.width; j++ )
@ -975,12 +1098,9 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
default:
CV_Error(CV_StsBadArg, ""); return;
}
}
else
#endif
{
#else
threshGeneric<double>(roi, src, src_step, dst, dst_step, thresh, maxval, type);
}
#endif
}
#ifdef HAVE_IPP

@ -3062,4 +3062,14 @@ TEST(ImgProc_BGR2RGBA, 3ch24ch)
EXPECT_DOUBLE_EQ(cvtest::norm(expected - dst, NORM_INF), 0.);
}
TEST(ImgProc_RGB2YUV, regression_13668)
{
Mat src(Size(32, 4), CV_8UC3, Scalar(9, 250, 82)); // Ensure that SIMD code path works
Mat dst;
cvtColor(src, dst, COLOR_RGB2YUV);
Vec3b res = dst.at<Vec3b>(0, 0);
Vec3b ref(159, 90, 0);
EXPECT_EQ(res, ref);
}
}} // namespace

@ -387,7 +387,7 @@ bool QRDetect::computeTransformationPoints()
findNonZero(mask_roi, non_zero_elem[i]);
newHull.insert(newHull.end(), non_zero_elem[i].begin(), non_zero_elem[i].end());
}
convexHull(Mat(newHull), locations);
convexHull(newHull, locations);
for (size_t i = 0; i < locations.size(); i++)
{
for (size_t j = 0; j < 3; j++)
@ -556,7 +556,7 @@ vector<Point2f> QRDetect::getQuadrilateral(vector<Point2f> angle_list)
}
vector<Point> integer_hull;
convexHull(Mat(locations), integer_hull);
convexHull(locations, integer_hull);
int hull_size = (int)integer_hull.size();
vector<Point2f> hull(hull_size);
for (int i = 0; i < hull_size; i++)
@ -901,7 +901,7 @@ bool QRDecode::versionDefinition()
vector<Point> locations, non_zero_elem;
Mat mask_roi = mask(Range(1, intermediate.rows - 1), Range(1, intermediate.cols - 1));
findNonZero(mask_roi, non_zero_elem);
convexHull(Mat(non_zero_elem), locations);
convexHull(non_zero_elem, locations);
Point offset = computeOffset(locations);
Point temp_remote = locations[0], remote_point;

@ -646,8 +646,6 @@ private:
Size size = frame0.size();
UMat prevFlowX, prevFlowY, curFlowX, curFlowY;
flowx.create(size, CV_32F);
flowy.create(size, CV_32F);
UMat flowx0 = flowx;
UMat flowy0 = flowy;
@ -1075,12 +1073,19 @@ private:
return false;
std::vector<UMat> flowar;
if (!_flow0.empty())
// If flag is set, check for integrity; if not set, allocate memory space
if (flags_ & OPTFLOW_USE_INITIAL_FLOW)
{
if (_flow0.empty() || _flow0.size() != _prev0.size() || _flow0.channels() != 2 ||
_flow0.depth() != CV_32F)
return false;
split(_flow0, flowar);
}
else
{
flowar.push_back(UMat());
flowar.push_back(UMat());
flowar.push_back(UMat(_prev0.size(), CV_32FC1));
flowar.push_back(UMat(_prev0.size(), CV_32FC1));
}
if(!this->operator()(_prev0.getUMat(), _next0.getUMat(), flowar[0], flowar[1])){
return false;
@ -1112,7 +1117,14 @@ void FarnebackOpticalFlowImpl::calc(InputArray _prev0, InputArray _next0,
CV_Assert( prev0.size() == next0.size() && prev0.channels() == next0.channels() &&
prev0.channels() == 1 && pyrScale_ < 1 );
// If flag is set, check for integrity; if not set, allocate memory space
if( flags_ & OPTFLOW_USE_INITIAL_FLOW )
CV_Assert( _flow0.size() == prev0.size() && _flow0.channels() == 2 &&
_flow0.depth() == CV_32F );
else
_flow0.create( prev0.size(), CV_32FC2 );
Mat flow0 = _flow0.getMat();
for( k = 0, scale = 1; k < levels; k++ )

@ -90,6 +90,18 @@ squeezenet:
classes: "classification_classes_ILSVRC2012.txt"
sample: "classification"
# Googlenet from https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet
googlenet:
model: "bvlc_googlenet.caffemodel"
config: "bvlc_googlenet.prototxt"
mean: [104, 117, 123]
scale: 1.0
width: 224
height: 224
rgb: false
classes: "classification_classes_ILSVRC2012.txt"
sample: "classification"
################################################################################
# Semantic segmentation models.
################################################################################

@ -289,7 +289,7 @@ def removeUnusedNodesAndAttrs(to_remove, graph_def):
op = graph_def.node[i].op
name = graph_def.node[i].name
if op == 'Const' or to_remove(name, op):
if to_remove(name, op):
if op != 'Const':
removedNodes.append(name)

@ -48,10 +48,42 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
removeIdentity(graph_def)
nodesToKeep = []
def to_remove(name, op):
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
if name in nodesToKeep:
return False
return op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
(name.startswith('CropAndResize') and op != 'CropAndResize')
# Fuse atrous convolutions (with dilations).
nodesMap = {node.name: node for node in graph_def.node}
for node in reversed(graph_def.node):
if node.op == 'BatchToSpaceND':
del node.input[2]
conv = nodesMap[node.input[0]]
spaceToBatchND = nodesMap[conv.input[0]]
# Extract paddings
stridedSlice = nodesMap[spaceToBatchND.input[2]]
assert(stridedSlice.op == 'StridedSlice')
pack = nodesMap[stridedSlice.input[0]]
assert(pack.op == 'Pack')
padNodeH = nodesMap[nodesMap[pack.input[0]].input[0]]
padNodeW = nodesMap[nodesMap[pack.input[1]].input[0]]
padH = int(padNodeH.attr['value']['tensor'][0]['int_val'][0])
padW = int(padNodeW.attr['value']['tensor'][0]['int_val'][0])
paddingsNode = NodeDef()
paddingsNode.name = conv.name + '/paddings'
paddingsNode.op = 'Const'
paddingsNode.addAttr('value', [padH, padH, padW, padW])
graph_def.node.insert(graph_def.node.index(spaceToBatchND), paddingsNode)
nodesToKeep.append(paddingsNode.name)
spaceToBatchND.input[2] = paddingsNode.name
removeUnusedNodesAndAttrs(to_remove, graph_def)
@ -225,6 +257,26 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
detectionOut.addAttr('variance_encoded_in_target', True)
graph_def.node.extend([detectionOut])
def getUnconnectedNodes():
unconnected = [node.name for node in graph_def.node]
for node in graph_def.node:
for inp in node.input:
if inp in unconnected:
unconnected.remove(inp)
return unconnected
while True:
unconnectedNodes = getUnconnectedNodes()
unconnectedNodes.remove(detectionOut.name)
if not unconnectedNodes:
break
for name in unconnectedNodes:
for i in range(len(graph_def.node)):
if graph_def.node[i].name == name:
del graph_def.node[i]
break
# Save as text.
graph_def.save(outputPath)

@ -55,7 +55,7 @@ graph_def = parseTextGraph(args.output)
removeIdentity(graph_def)
def to_remove(name, op):
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
return op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
(name.startswith('CropAndResize') and op != 'CropAndResize')
removeUnusedNodesAndAttrs(to_remove, graph_def)

@ -10,14 +10,60 @@
# Then you can import it with a binary frozen graph (.pb) using readNetFromTensorflow() function.
# See details and examples on the following wiki page: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
import argparse
import re
from math import sqrt
from tf_text_graph_common import *
class SSDAnchorGenerator:
def __init__(self, min_scale, max_scale, num_layers, aspect_ratios,
reduce_boxes_in_lowest_layer, image_width, image_height):
self.min_scale = min_scale
self.aspect_ratios = aspect_ratios
self.reduce_boxes_in_lowest_layer = reduce_boxes_in_lowest_layer
self.image_width = image_width
self.image_height = image_height
self.scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
def get(self, layer_id):
if layer_id == 0 and self.reduce_boxes_in_lowest_layer:
widths = [0.1, self.min_scale * sqrt(2.0), self.min_scale * sqrt(0.5)]
heights = [0.1, self.min_scale / sqrt(2.0), self.min_scale / sqrt(0.5)]
else:
widths = [self.scales[layer_id] * sqrt(ar) for ar in self.aspect_ratios]
heights = [self.scales[layer_id] / sqrt(ar) for ar in self.aspect_ratios]
widths += [sqrt(self.scales[layer_id] * self.scales[layer_id + 1])]
heights += [sqrt(self.scales[layer_id] * self.scales[layer_id + 1])]
widths = [w * self.image_width for w in widths]
heights = [h * self.image_height for h in heights]
return widths, heights
class MultiscaleAnchorGenerator:
def __init__(self, min_level, aspect_ratios, scales_per_octave, anchor_scale):
self.min_level = min_level
self.aspect_ratios = aspect_ratios
self.anchor_scale = anchor_scale
self.scales = [2**(float(s) / scales_per_octave) for s in range(scales_per_octave)]
def get(self, layer_id):
widths = []
heights = []
for a in self.aspect_ratios:
for s in self.scales:
base_anchor_size = 2**(self.min_level + layer_id) * self.anchor_scale
ar = sqrt(a)
heights.append(base_anchor_size * s / ar)
widths.append(base_anchor_size * s * ar)
return widths, heights
def createSSDGraph(modelPath, configPath, outputPath):
# Nodes that should be kept.
keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu6', 'Placeholder', 'FusedBatchNorm',
keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu', 'Relu6', 'Placeholder', 'FusedBatchNorm',
'DepthwiseConv2dNative', 'ConcatV2', 'Mul', 'MaxPool', 'AvgPool', 'Identity',
'Sub']
'Sub', 'ResizeNearestNeighbor', 'Pad']
# Node with which prefixes should be removed
prefixesToRemove = ('MultipleGridAnchorGenerator/', 'Postprocessor/', 'Preprocessor/map')
@ -27,7 +73,15 @@ def createSSDGraph(modelPath, configPath, outputPath):
config = config['model'][0]['ssd'][0]
num_classes = int(config['num_classes'][0])
ssd_anchor_generator = config['anchor_generator'][0]['ssd_anchor_generator'][0]
fixed_shape_resizer = config['image_resizer'][0]['fixed_shape_resizer'][0]
image_width = int(fixed_shape_resizer['width'][0])
image_height = int(fixed_shape_resizer['height'][0])
box_predictor = 'convolutional' if 'convolutional_box_predictor' in config['box_predictor'][0] else 'weight_shared_convolutional'
anchor_generator = config['anchor_generator'][0]
if 'ssd_anchor_generator' in anchor_generator:
ssd_anchor_generator = anchor_generator['ssd_anchor_generator'][0]
min_scale = float(ssd_anchor_generator['min_scale'][0])
max_scale = float(ssd_anchor_generator['max_scale'][0])
num_layers = int(ssd_anchor_generator['num_layers'][0])
@ -35,18 +89,34 @@ def createSSDGraph(modelPath, configPath, outputPath):
reduce_boxes_in_lowest_layer = True
if 'reduce_boxes_in_lowest_layer' in ssd_anchor_generator:
reduce_boxes_in_lowest_layer = ssd_anchor_generator['reduce_boxes_in_lowest_layer'][0] == 'true'
priors_generator = SSDAnchorGenerator(min_scale, max_scale, num_layers,
aspect_ratios, reduce_boxes_in_lowest_layer,
image_width, image_height)
fixed_shape_resizer = config['image_resizer'][0]['fixed_shape_resizer'][0]
image_width = int(fixed_shape_resizer['width'][0])
image_height = int(fixed_shape_resizer['height'][0])
box_predictor = 'convolutional' if 'convolutional_box_predictor' in config['box_predictor'][0] else 'weight_shared_convolutional'
print('Number of classes: %d' % num_classes)
print('Number of layers: %d' % num_layers)
print('Scale: [%f-%f]' % (min_scale, max_scale))
print('Aspect ratios: %s' % str(aspect_ratios))
print('Reduce boxes in the lowest layer: %s' % str(reduce_boxes_in_lowest_layer))
elif 'multiscale_anchor_generator' in anchor_generator:
multiscale_anchor_generator = anchor_generator['multiscale_anchor_generator'][0]
min_level = int(multiscale_anchor_generator['min_level'][0])
max_level = int(multiscale_anchor_generator['max_level'][0])
anchor_scale = float(multiscale_anchor_generator['anchor_scale'][0])
aspect_ratios = [float(ar) for ar in multiscale_anchor_generator['aspect_ratios']]
scales_per_octave = int(multiscale_anchor_generator['scales_per_octave'][0])
num_layers = max_level - min_level + 1
priors_generator = MultiscaleAnchorGenerator(min_level, aspect_ratios,
scales_per_octave, anchor_scale)
print('Levels: [%d-%d]' % (min_level, max_level))
print('Anchor scale: %f' % anchor_scale)
print('Scales per octave: %d' % scales_per_octave)
print('Aspect ratios: %s' % str(aspect_ratios))
else:
print('Unknown anchor_generator')
exit(0)
print('Number of classes: %d' % num_classes)
print('Number of layers: %d' % num_layers)
print('box predictor: %s' % box_predictor)
print('Input image size: %dx%d' % (image_width, image_height))
@ -67,8 +137,8 @@ def createSSDGraph(modelPath, configPath, outputPath):
return unconnected
def fuse_nodes(nodesToKeep):
# Detect unfused batch normalization nodes and fuse them.
def fuse_batch_normalization():
# Add_0 <-- moving_variance, add_y
# Rsqrt <-- Add_0
# Mul_0 <-- Rsqrt, gamma
@ -77,9 +147,15 @@ def createSSDGraph(modelPath, configPath, outputPath):
# Sub_0 <-- beta, Mul_2
# Add_1 <-- Mul_1, Sub_0
nodesMap = {node.name: node for node in graph_def.node}
subgraph = ['Add',
subgraphBatchNorm = ['Add',
['Mul', 'input', ['Mul', ['Rsqrt', ['Add', 'moving_variance', 'add_y']], 'gamma']],
['Sub', 'beta', ['Mul', 'moving_mean', 'Mul_0']]]
# Detect unfused nearest neighbor resize.
subgraphResizeNN = ['Reshape',
['Mul', ['Reshape', 'input', ['Pack', 'shape_1', 'shape_2', 'shape_3', 'shape_4', 'shape_5']],
'ones'],
['Pack', ['StridedSlice', ['Shape', 'input'], 'stack', 'stack_1', 'stack_2'],
'out_height', 'out_width', 'out_channels']]
def checkSubgraph(node, targetNode, inputs, fusedNodes):
op = targetNode[0]
if node.op == op and (len(node.input) >= len(targetNode) - 1):
@ -100,7 +176,7 @@ def createSSDGraph(modelPath, configPath, outputPath):
for node in graph_def.node:
inputs = {}
fusedNodes = []
if checkSubgraph(node, subgraph, inputs, fusedNodes):
if checkSubgraph(node, subgraphBatchNorm, inputs, fusedNodes):
name = node.name
node.Clear()
node.name = name
@ -112,15 +188,41 @@ def createSSDGraph(modelPath, configPath, outputPath):
node.input.append(inputs['moving_variance'])
node.addAttr('epsilon', 0.001)
nodesToRemove += fusedNodes[1:]
inputs = {}
fusedNodes = []
if checkSubgraph(node, subgraphResizeNN, inputs, fusedNodes):
name = node.name
node.Clear()
node.name = name
node.op = 'ResizeNearestNeighbor'
node.input.append(inputs['input'])
node.input.append(name + '/output_shape')
out_height_node = nodesMap[inputs['out_height']]
out_width_node = nodesMap[inputs['out_width']]
out_height = int(out_height_node.attr['value']['tensor'][0]['int_val'][0])
out_width = int(out_width_node.attr['value']['tensor'][0]['int_val'][0])
shapeNode = NodeDef()
shapeNode.name = name + '/output_shape'
shapeNode.op = 'Const'
shapeNode.addAttr('value', [out_height, out_width])
graph_def.node.insert(graph_def.node.index(node), shapeNode)
nodesToKeep.append(shapeNode.name)
nodesToRemove += fusedNodes[1:]
for node in nodesToRemove:
graph_def.node.remove(node)
fuse_batch_normalization()
nodesToKeep = []
fuse_nodes(nodesToKeep)
removeIdentity(graph_def)
def to_remove(name, op):
return (not op in keepOps) or name.startswith(prefixesToRemove)
return (not name in nodesToKeep) and \
(op == 'Const' or (not op in keepOps) or name.startswith(prefixesToRemove))
removeUnusedNodesAndAttrs(to_remove, graph_def)
@ -169,19 +271,15 @@ def createSSDGraph(modelPath, configPath, outputPath):
graph_def.node.extend([flatten])
addConcatNode('%s/concat' % label, concatInputs, 'concat/axis_flatten')
idx = 0
num_matched_layers = 0
for node in graph_def.node:
if node.name == ('BoxPredictor_%d/BoxEncodingPredictor/Conv2D' % idx) or \
node.name == ('WeightSharedConvolutionalBoxPredictor_%d/BoxPredictor/Conv2D' % idx) or \
node.name == 'WeightSharedConvolutionalBoxPredictor/BoxPredictor/Conv2D':
if re.match('BoxPredictor_\d/BoxEncodingPredictor/Conv2D', node.name) or \
re.match('WeightSharedConvolutionalBoxPredictor(_\d)*/BoxPredictor/Conv2D', node.name):
node.addAttr('loc_pred_transposed', True)
idx += 1
assert(idx == num_layers)
num_matched_layers += 1
assert(num_matched_layers == num_layers)
# Add layers that generate anchors (bounding boxes proposals).
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
priorBoxes = []
for i in range(num_layers):
priorBox = NodeDef()
@ -199,17 +297,8 @@ def createSSDGraph(modelPath, configPath, outputPath):
priorBox.addAttr('flip', False)
priorBox.addAttr('clip', False)
if i == 0 and reduce_boxes_in_lowest_layer:
widths = [0.1, min_scale * sqrt(2.0), min_scale * sqrt(0.5)]
heights = [0.1, min_scale / sqrt(2.0), min_scale / sqrt(0.5)]
else:
widths = [scales[i] * sqrt(ar) for ar in aspect_ratios]
heights = [scales[i] / sqrt(ar) for ar in aspect_ratios]
widths, heights = priors_generator.get(i)
widths += [sqrt(scales[i] * scales[i + 1])]
heights += [sqrt(scales[i] * scales[i + 1])]
widths = [w * image_width for w in widths]
heights = [h * image_height for h in heights]
priorBox.addAttr('width', widths)
priorBox.addAttr('height', heights)
priorBox.addAttr('variance', [0.1, 0.1, 0.2, 0.2])
@ -217,6 +306,7 @@ def createSSDGraph(modelPath, configPath, outputPath):
graph_def.node.extend([priorBox])
priorBoxes.append(priorBox.name)
# Compare this layer's output with Postprocessor/Reshape
addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten')
# Sigmoid for classes predictions and DetectionOutput layer

Loading…
Cancel
Save