Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/17539/head
Alexander Alekhin 5 years ago
commit 0cbaaba4b1
  1. 4
      cmake/OpenCVDetectInferenceEngine.cmake
  2. 20
      cmake/OpenCVFindMKL.cmake
  3. 8
      doc/tutorials/introduction/macos_install/macos_install.markdown
  4. 12
      modules/core/include/opencv2/core/eigen.hpp
  5. 2
      modules/dnn/src/dnn.cpp
  6. 8
      modules/dnn/src/ie_ngraph.cpp
  7. 7
      modules/dnn/src/op_inf_engine.hpp
  8. 129
      samples/dnn/text_detection.py

@ -129,9 +129,9 @@ endif()
if(INF_ENGINE_TARGET) if(INF_ENGINE_TARGET)
if(NOT INF_ENGINE_RELEASE) if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version has not been set, 2020.2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") message(WARNING "InferenceEngine version has not been set, 2020.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
endif() endif()
set(INF_ENGINE_RELEASE "2020020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") set(INF_ENGINE_RELEASE "2020030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
) )

@ -79,9 +79,10 @@ get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
#determine arch #determine arch
if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8) if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_X64 1) set(MKL_ARCH_LIST "intel64")
set(MKL_ARCH "intel64") if(MSVC)
list(APPEND MKL_ARCH_LIST "win-x64")
endif()
include(CheckTypeSize) include(CheckTypeSize)
CHECK_TYPE_SIZE(int _sizeof_int) CHECK_TYPE_SIZE(int _sizeof_int)
if (_sizeof_int EQUAL 4) if (_sizeof_int EQUAL 4)
@ -90,14 +91,19 @@ if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_ARCH_SUFFIX "ilp64") set(MKL_ARCH_SUFFIX "ilp64")
endif() endif()
else() else()
set(MKL_ARCH "ia32") set(MKL_ARCH_LIST "ia32")
set(MKL_ARCH_SUFFIX "c") set(MKL_ARCH_SUFFIX "c")
endif() endif()
if(MKL_VERSION_STR VERSION_GREATER "11.3.0" OR MKL_VERSION_STR VERSION_EQUAL "11.3.0") if(MKL_VERSION_STR VERSION_GREATER "11.3.0" OR MKL_VERSION_STR VERSION_EQUAL "11.3.0")
set(mkl_lib_find_paths set(mkl_lib_find_paths
${MKL_ROOT_DIR}/lib ${MKL_ROOT_DIR}/lib)
${MKL_ROOT_DIR}/lib/${MKL_ARCH} ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}) foreach(MKL_ARCH ${MKL_ARCH_LIST})
list(APPEND mkl_lib_find_paths
${MKL_ROOT_DIR}/lib/${MKL_ARCH}
${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}
${MKL_ROOT_DIR}/${MKL_ARCH})
endforeach()
set(mkl_lib_list "mkl_intel_${MKL_ARCH_SUFFIX}") set(mkl_lib_list "mkl_intel_${MKL_ARCH_SUFFIX}")
@ -121,7 +127,7 @@ endif()
set(MKL_LIBRARIES "") set(MKL_LIBRARIES "")
foreach(lib ${mkl_lib_list}) foreach(lib ${mkl_lib_list})
find_library(${lib} ${lib} ${mkl_lib_find_paths}) find_library(${lib} NAMES ${lib} ${lib}_dll HINTS ${mkl_lib_find_paths})
mark_as_advanced(${lib}) mark_as_advanced(${lib})
if(NOT ${lib}) if(NOT ${lib})
mkl_fail() mkl_fail()

@ -30,7 +30,7 @@ Installing CMake
-# Install the dmg package and launch it from Applications. That will give you the UI app of CMake -# Install the dmg package and launch it from Applications. That will give you the UI app of CMake
-# From the CMake app window, choose menu Tools --> Install For Command Line Use. -# From the CMake app window, choose menu Tools --> How to Install For Command Line Use. Then, follow the instructions from the pop-up there.
-# Install folder will be /usr/bin/ by default, submit it by choosing Install command line links. -# Install folder will be /usr/bin/ by default, submit it by choosing Install command line links.
@ -66,7 +66,7 @@ git clone https://github.com/opencv/opencv_contrib.git
Building OpenCV from Source Using CMake Building OpenCV from Source Using CMake
--------------------------------------- ---------------------------------------
-# Create a temporary directory, which we denote as `<cmake_build_dir>`, where you want to put -# Create a temporary directory, which we denote as `build_opencv`, where you want to put
the generated Makefiles, project files as well the object files and output binaries and enter the generated Makefiles, project files as well the object files and output binaries and enter
there. there.
@ -87,8 +87,8 @@ Building OpenCV from Source Using CMake
or cmake-gui or cmake-gui
- set full path to OpenCV source code, e.g. `/home/user/opencv` - set the OpenCV source code path to, e.g. `/home/user/opencv`
- set full path to `<cmake_build_dir>`, e.g. `/home/user/build_opencv` - set the binary build path to your CMake build directory, e.g. `/home/user/build_opencv`
- set optional parameters - set optional parameters
- run: "Configure" - run: "Configure"
- run: "Generate" - run: "Generate"

@ -66,10 +66,18 @@
namespace cv namespace cv
{ {
//! @addtogroup core_eigen /** @addtogroup core_eigen
These functions are provided for OpenCV-Eigen interoperability. They convert `Mat`
objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen
documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for
information about the `Matrix` template type.
@note Using these functions requires the `Eigen/Dense` or similar header to be
included before this header.
*/
//! @{ //! @{
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT #if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN)
/** @brief Converts an Eigen::Tensor to a cv::Mat. /** @brief Converts an Eigen::Tensor to a cv::Mat.
The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where: The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:

@ -2248,7 +2248,7 @@ struct Net::Impl : public detail::NetImplBase
auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>(); auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
CV_Assert(oid < ieInpNode->node->get_output_size()); CV_Assert(oid < ieInpNode->node->get_output_size());
#if INF_ENGINE_VER_MAJOR_GT(2020030000) #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
#else #else
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false))); inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));

@ -82,7 +82,7 @@ public:
return type_info; return type_info;
} }
#if INF_ENGINE_VER_MAJOR_GT(2020020000) #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3)
NgraphCustomOp(const ngraph::OutputVector& inputs, NgraphCustomOp(const ngraph::OutputVector& inputs,
#else #else
NgraphCustomOp(const ngraph::NodeVector& inputs, NgraphCustomOp(const ngraph::NodeVector& inputs,
@ -112,7 +112,7 @@ public:
std::shared_ptr<ngraph::Node> copy_with_new_args(const ngraph::NodeVector& new_args) const override std::shared_ptr<ngraph::Node> copy_with_new_args(const ngraph::NodeVector& new_args) const override
{ {
#if INF_ENGINE_VER_MAJOR_GT(2020020000) #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3)
return std::make_shared<NgraphCustomOp>(ngraph::as_output_vector(new_args), params); return std::make_shared<NgraphCustomOp>(ngraph::as_output_vector(new_args), params);
#else #else
return std::make_shared<NgraphCustomOp>(new_args, params); return std::make_shared<NgraphCustomOp>(new_args, params);
@ -239,7 +239,9 @@ private:
class InfEngineNgraphExtension : public InferenceEngine::IExtension class InfEngineNgraphExtension : public InferenceEngine::IExtension
{ {
public: public:
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {} virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {}
#endif
virtual void Unload() noexcept {} virtual void Unload() noexcept {}
virtual void Release() noexcept {} virtual void Release() noexcept {}
virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {} virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {}
@ -283,7 +285,7 @@ InfEngineNgraphNode::InfEngineNgraphNode(const std::vector<Ptr<BackendNode> >& n
{"internals", shapesToStr(internals)} {"internals", shapesToStr(internals)}
}; };
#if INF_ENGINE_VER_MAJOR_GT(2020020000) #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3)
ngraph::OutputVector inp_nodes; ngraph::OutputVector inp_nodes;
#else #else
ngraph::NodeVector inp_nodes; ngraph::NodeVector inp_nodes;

@ -25,10 +25,11 @@
#define INF_ENGINE_RELEASE_2019R3 2019030000 #define INF_ENGINE_RELEASE_2019R3 2019030000
#define INF_ENGINE_RELEASE_2020_1 2020010000 #define INF_ENGINE_RELEASE_2020_1 2020010000
#define INF_ENGINE_RELEASE_2020_2 2020020000 #define INF_ENGINE_RELEASE_2020_2 2020020000
#define INF_ENGINE_RELEASE_2020_3 2020030000
#ifndef INF_ENGINE_RELEASE #ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2020.2 by default") #warning("IE version have not been provided via command-line. Using 2020.3 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_2 #define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_3
#endif #endif
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
@ -226,7 +227,9 @@ private:
class InfEngineExtension : public InferenceEngine::IExtension class InfEngineExtension : public InferenceEngine::IExtension
{ {
public: public:
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {} virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {}
#endif
virtual void Unload() noexcept {} virtual void Unload() noexcept {}
virtual void Release() noexcept {} virtual void Release() noexcept {}
virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {} virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {}

@ -1,25 +1,81 @@
'''
Text detection model: https://github.com/argman/EAST
Download link: https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1
Text recognition model taken from here: https://github.com/meijieru/crnn.pytorch
How to convert from pb to onnx:
Using classes from here: https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py
import torch
import models.crnn as CRNN
model = CRNN(32, 1, 37, 256)
model.load_state_dict(torch.load('crnn.pth'))
dummy_input = torch.randn(1, 1, 32, 100)
torch.onnx.export(model, dummy_input, "crnn.onnx", verbose=True)
'''
# Import required modules # Import required modules
import numpy as np
import cv2 as cv import cv2 as cv
import math import math
import argparse import argparse
############ Add argument parser for command line arguments ############ ############ Add argument parser for command line arguments ############
parser = argparse.ArgumentParser(description='Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)') parser = argparse.ArgumentParser(
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') description="Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of "
parser.add_argument('--model', required=True, "EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)"
help='Path to a binary .pb file of model contains trained weights.') "The OCR model can be obtained from converting the pretrained CRNN model to .onnx format from the github repository https://github.com/meijieru/crnn.pytorch")
parser.add_argument('--input',
help='Path to input image or video file. Skip this argument to capture frames from a camera.')
parser.add_argument('--model', '-m', required=True,
help='Path to a binary .pb file contains trained detector network.')
parser.add_argument('--ocr', default="crnn.onnx",
help="Path to a binary .pb or .onnx file contains trained recognition network", )
parser.add_argument('--width', type=int, default=320, parser.add_argument('--width', type=int, default=320,
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.') help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
parser.add_argument('--height',type=int, default=320, parser.add_argument('--height', type=int, default=320,
help='Preprocess input image by resizing to a specific height. It should be multiple by 32.') help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
parser.add_argument('--thr',type=float, default=0.5, parser.add_argument('--thr', type=float, default=0.5,
help='Confidence threshold.') help='Confidence threshold.')
parser.add_argument('--nms',type=float, default=0.4, parser.add_argument('--nms', type=float, default=0.4,
help='Non-maximum suppression threshold.') help='Non-maximum suppression threshold.')
args = parser.parse_args() args = parser.parse_args()
############ Utility functions ############ ############ Utility functions ############
def decode(scores, geometry, scoreThresh):
def fourPointsTransform(frame, vertices):
vertices = np.asarray(vertices)
outputSize = (100, 32)
targetVertices = np.array([
[0, outputSize[1] - 1],
[0, 0],
[outputSize[0] - 1, 0],
[outputSize[0] - 1, outputSize[1] - 1]], dtype="float32")
rotationMatrix = cv.getPerspectiveTransform(vertices, targetVertices)
result = cv.warpPerspective(frame, rotationMatrix, outputSize)
return result
def decodeText(scores):
text = ""
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
for i in range(scores.shape[0]):
c = np.argmax(scores[i][0])
if c != 0:
text += alphabet[c - 1]
else:
text += '-'
# adjacent same letters as well as background text must be removed to get the final output
char_list = []
for i in range(len(text)):
if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
char_list.append(text[i])
return ''.join(char_list)
def decodeBoundingBoxes(scores, geometry, scoreThresh):
detections = [] detections = []
confidences = [] confidences = []
@ -47,7 +103,7 @@ def decode(scores, geometry, scoreThresh):
score = scoresData[x] score = scoresData[x]
# If score is lower than threshold score, move to next x # If score is lower than threshold score, move to next x
if(score < scoreThresh): if (score < scoreThresh):
continue continue
# Calculate offset # Calculate offset
@ -66,24 +122,27 @@ def decode(scores, geometry, scoreThresh):
# Find points for rectangle # Find points for rectangle
p1 = (-sinA * h + offset[0], -cosA * h + offset[1]) p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
p3 = (-cosA * w + offset[0], sinA * w + offset[1]) p3 = (-cosA * w + offset[0], sinA * w + offset[1])
center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1])) center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1]))
detections.append((center, (w,h), -1*angle * 180.0 / math.pi)) detections.append((center, (w, h), -1 * angle * 180.0 / math.pi))
confidences.append(float(score)) confidences.append(float(score))
# Return detections and confidences # Return detections and confidences
return [detections, confidences] return [detections, confidences]
def main(): def main():
# Read and store arguments # Read and store arguments
confThreshold = args.thr confThreshold = args.thr
nmsThreshold = args.nms nmsThreshold = args.nms
inpWidth = args.width inpWidth = args.width
inpHeight = args.height inpHeight = args.height
model = args.model modelDetector = args.model
modelRecognition = args.ocr
# Load network # Load network
net = cv.dnn.readNet(model) detector = cv.dnn.readNet(modelDetector)
recognizer = cv.dnn.readNet(modelRecognition)
# Create a new named window # Create a new named window
kWinName = "EAST: An Efficient and Accurate Scene Text Detector" kWinName = "EAST: An Efficient and Accurate Scene Text Detector"
@ -95,6 +154,7 @@ def main():
# Open a video file or an image file or a camera stream # Open a video file or an image file or a camera stream
cap = cv.VideoCapture(args.input if args.input else 0) cap = cv.VideoCapture(args.input if args.input else 0)
tickmeter = cv.TickMeter()
while cv.waitKey(1) < 0: while cv.waitKey(1) < 0:
# Read frame # Read frame
hasFrame, frame = cap.read() hasFrame, frame = cap.read()
@ -111,19 +171,20 @@ def main():
# Create a 4D blob from frame. # Create a 4D blob from frame.
blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False) blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False)
# Run the model # Run the detection model
net.setInput(blob) detector.setInput(blob)
outs = net.forward(outNames)
t, _ = net.getPerfProfile() tickmeter.start()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) outs = detector.forward(outNames)
tickmeter.stop()
# Get scores and geometry # Get scores and geometry
scores = outs[0] scores = outs[0]
geometry = outs[1] geometry = outs[1]
[boxes, confidences] = decode(scores, geometry, confThreshold) [boxes, confidences] = decodeBoundingBoxes(scores, geometry, confThreshold)
# Apply NMS # Apply NMS
indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,nmsThreshold) indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)
for i in indices: for i in indices:
# get 4 corners of the rotated rect # get 4 corners of the rotated rect
vertices = cv.boxPoints(boxes[i[0]]) vertices = cv.boxPoints(boxes[i[0]])
@ -131,16 +192,40 @@ def main():
for j in range(4): for j in range(4):
vertices[j][0] *= rW vertices[j][0] *= rW
vertices[j][1] *= rH vertices[j][1] *= rH
# get cropped image using perspective transform
if modelRecognition:
cropped = fourPointsTransform(frame, vertices)
cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY)
# Create a 4D blob from cropped image
blob = cv.dnn.blobFromImage(cropped, size=(100, 32), mean=127.5, scalefactor=1 / 127.5)
recognizer.setInput(blob)
# Run the recognition model
tickmeter.start()
result = recognizer.forward()
tickmeter.stop()
# decode the result into text
wordRecognized = decodeText(result)
cv.putText(frame, wordRecognized, (int(vertices[1][0]), int(vertices[1][1])), cv.FONT_HERSHEY_SIMPLEX,
0.5, (255, 0, 0))
for j in range(4): for j in range(4):
p1 = (vertices[j][0], vertices[j][1]) p1 = (vertices[j][0], vertices[j][1])
p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
cv.line(frame, p1, p2, (0, 255, 0), 1) cv.line(frame, p1, p2, (0, 255, 0), 1)
# Put efficiency information # Put efficiency information
label = 'Inference time: %.2f ms' % (tickmeter.getTimeMilli())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
# Display the frame # Display the frame
cv.imshow(kWinName,frame) cv.imshow(kWinName, frame)
tickmeter.reset()
if __name__ == "__main__": if __name__ == "__main__":
main() main()

Loading…
Cancel
Save