Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/17539/head
Alexander Alekhin 5 years ago
commit 0cbaaba4b1
  1. 4
      cmake/OpenCVDetectInferenceEngine.cmake
  2. 20
      cmake/OpenCVFindMKL.cmake
  3. 8
      doc/tutorials/introduction/macos_install/macos_install.markdown
  4. 12
      modules/core/include/opencv2/core/eigen.hpp
  5. 2
      modules/dnn/src/dnn.cpp
  6. 8
      modules/dnn/src/ie_ngraph.cpp
  7. 7
      modules/dnn/src/op_inf_engine.hpp
  8. 129
      samples/dnn/text_detection.py

@ -129,9 +129,9 @@ endif()
if(INF_ENGINE_TARGET)
if(NOT INF_ENGINE_RELEASE)
message(WARNING "InferenceEngine version has not been set, 2020.2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
message(WARNING "InferenceEngine version has not been set, 2020.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
endif()
set(INF_ENGINE_RELEASE "2020020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set(INF_ENGINE_RELEASE "2020030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
)

@ -79,9 +79,10 @@ get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
#determine arch
if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_X64 1)
set(MKL_ARCH "intel64")
set(MKL_ARCH_LIST "intel64")
if(MSVC)
list(APPEND MKL_ARCH_LIST "win-x64")
endif()
include(CheckTypeSize)
CHECK_TYPE_SIZE(int _sizeof_int)
if (_sizeof_int EQUAL 4)
@ -90,14 +91,19 @@ if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_ARCH_SUFFIX "ilp64")
endif()
else()
set(MKL_ARCH "ia32")
set(MKL_ARCH_LIST "ia32")
set(MKL_ARCH_SUFFIX "c")
endif()
if(MKL_VERSION_STR VERSION_GREATER "11.3.0" OR MKL_VERSION_STR VERSION_EQUAL "11.3.0")
set(mkl_lib_find_paths
${MKL_ROOT_DIR}/lib
${MKL_ROOT_DIR}/lib/${MKL_ARCH} ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH})
${MKL_ROOT_DIR}/lib)
foreach(MKL_ARCH ${MKL_ARCH_LIST})
list(APPEND mkl_lib_find_paths
${MKL_ROOT_DIR}/lib/${MKL_ARCH}
${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}
${MKL_ROOT_DIR}/${MKL_ARCH})
endforeach()
set(mkl_lib_list "mkl_intel_${MKL_ARCH_SUFFIX}")
@ -121,7 +127,7 @@ endif()
set(MKL_LIBRARIES "")
foreach(lib ${mkl_lib_list})
find_library(${lib} ${lib} ${mkl_lib_find_paths})
find_library(${lib} NAMES ${lib} ${lib}_dll HINTS ${mkl_lib_find_paths})
mark_as_advanced(${lib})
if(NOT ${lib})
mkl_fail()

@ -30,7 +30,7 @@ Installing CMake
-# Install the dmg package and launch it from Applications. That will give you the UI app of CMake
-# From the CMake app window, choose menu Tools --> Install For Command Line Use.
-# From the CMake app window, choose menu Tools --> How to Install For Command Line Use. Then, follow the instructions from the pop-up there.
-# Install folder will be /usr/bin/ by default, submit it by choosing Install command line links.
@ -66,7 +66,7 @@ git clone https://github.com/opencv/opencv_contrib.git
Building OpenCV from Source Using CMake
---------------------------------------
-# Create a temporary directory, which we denote as `<cmake_build_dir>`, where you want to put
-# Create a temporary directory, which we denote as `build_opencv`, where you want to put
the generated Makefiles, project files as well the object files and output binaries and enter
there.
@ -87,8 +87,8 @@ Building OpenCV from Source Using CMake
or cmake-gui
- set full path to OpenCV source code, e.g. `/home/user/opencv`
- set full path to `<cmake_build_dir>`, e.g. `/home/user/build_opencv`
- set the OpenCV source code path to, e.g. `/home/user/opencv`
- set the binary build path to your CMake build directory, e.g. `/home/user/build_opencv`
- set optional parameters
- run: "Configure"
- run: "Generate"

@ -66,10 +66,18 @@
namespace cv
{
//! @addtogroup core_eigen
/** @addtogroup core_eigen
These functions are provided for OpenCV-Eigen interoperability. They convert `Mat`
objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen
documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for
information about the `Matrix` template type.
@note Using these functions requires the `Eigen/Dense` or similar header to be
included before this header.
*/
//! @{
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
#if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN)
/** @brief Converts an Eigen::Tensor to a cv::Mat.
The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:

@ -2248,7 +2248,7 @@ struct Net::Impl : public detail::NetImplBase
auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
CV_Assert(oid < ieInpNode->node->get_output_size());
#if INF_ENGINE_VER_MAJOR_GT(2020030000)
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
#else
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));

@ -82,7 +82,7 @@ public:
return type_info;
}
#if INF_ENGINE_VER_MAJOR_GT(2020020000)
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3)
NgraphCustomOp(const ngraph::OutputVector& inputs,
#else
NgraphCustomOp(const ngraph::NodeVector& inputs,
@ -112,7 +112,7 @@ public:
std::shared_ptr<ngraph::Node> copy_with_new_args(const ngraph::NodeVector& new_args) const override
{
#if INF_ENGINE_VER_MAJOR_GT(2020020000)
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3)
return std::make_shared<NgraphCustomOp>(ngraph::as_output_vector(new_args), params);
#else
return std::make_shared<NgraphCustomOp>(new_args, params);
@ -239,7 +239,9 @@ private:
class InfEngineNgraphExtension : public InferenceEngine::IExtension
{
public:
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {}
#endif
virtual void Unload() noexcept {}
virtual void Release() noexcept {}
virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {}
@ -283,7 +285,7 @@ InfEngineNgraphNode::InfEngineNgraphNode(const std::vector<Ptr<BackendNode> >& n
{"internals", shapesToStr(internals)}
};
#if INF_ENGINE_VER_MAJOR_GT(2020020000)
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_3)
ngraph::OutputVector inp_nodes;
#else
ngraph::NodeVector inp_nodes;

@ -25,10 +25,11 @@
#define INF_ENGINE_RELEASE_2019R3 2019030000
#define INF_ENGINE_RELEASE_2020_1 2020010000
#define INF_ENGINE_RELEASE_2020_2 2020020000
#define INF_ENGINE_RELEASE_2020_3 2020030000
#ifndef INF_ENGINE_RELEASE
#warning("IE version have not been provided via command-line. Using 2020.2 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_2
#warning("IE version have not been provided via command-line. Using 2020.3 by default")
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_3
#endif
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
@ -226,7 +227,9 @@ private:
class InfEngineExtension : public InferenceEngine::IExtension
{
public:
#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2)
virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {}
#endif
virtual void Unload() noexcept {}
virtual void Release() noexcept {}
virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {}

@ -1,25 +1,81 @@
'''
Text detection model: https://github.com/argman/EAST
Download link: https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1
Text recognition model taken from here: https://github.com/meijieru/crnn.pytorch
How to convert from pb to onnx:
Using classes from here: https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py
import torch
import models.crnn as CRNN
model = CRNN(32, 1, 37, 256)
model.load_state_dict(torch.load('crnn.pth'))
dummy_input = torch.randn(1, 1, 32, 100)
torch.onnx.export(model, dummy_input, "crnn.onnx", verbose=True)
'''
# Import required modules
import numpy as np
import cv2 as cv
import math
import argparse
############ Add argument parser for command line arguments ############
parser = argparse.ArgumentParser(description='Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)')
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
parser.add_argument('--model', required=True,
help='Path to a binary .pb file of model contains trained weights.')
parser = argparse.ArgumentParser(
description="Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of "
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)"
"The OCR model can be obtained from converting the pretrained CRNN model to .onnx format from the github repository https://github.com/meijieru/crnn.pytorch")
parser.add_argument('--input',
help='Path to input image or video file. Skip this argument to capture frames from a camera.')
parser.add_argument('--model', '-m', required=True,
help='Path to a binary .pb file contains trained detector network.')
parser.add_argument('--ocr', default="crnn.onnx",
help="Path to a binary .pb or .onnx file contains trained recognition network", )
parser.add_argument('--width', type=int, default=320,
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
parser.add_argument('--height',type=int, default=320,
parser.add_argument('--height', type=int, default=320,
help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
parser.add_argument('--thr',type=float, default=0.5,
parser.add_argument('--thr', type=float, default=0.5,
help='Confidence threshold.')
parser.add_argument('--nms',type=float, default=0.4,
parser.add_argument('--nms', type=float, default=0.4,
help='Non-maximum suppression threshold.')
args = parser.parse_args()
############ Utility functions ############
def decode(scores, geometry, scoreThresh):
def fourPointsTransform(frame, vertices):
vertices = np.asarray(vertices)
outputSize = (100, 32)
targetVertices = np.array([
[0, outputSize[1] - 1],
[0, 0],
[outputSize[0] - 1, 0],
[outputSize[0] - 1, outputSize[1] - 1]], dtype="float32")
rotationMatrix = cv.getPerspectiveTransform(vertices, targetVertices)
result = cv.warpPerspective(frame, rotationMatrix, outputSize)
return result
def decodeText(scores):
text = ""
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
for i in range(scores.shape[0]):
c = np.argmax(scores[i][0])
if c != 0:
text += alphabet[c - 1]
else:
text += '-'
# adjacent same letters as well as background text must be removed to get the final output
char_list = []
for i in range(len(text)):
if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
char_list.append(text[i])
return ''.join(char_list)
def decodeBoundingBoxes(scores, geometry, scoreThresh):
detections = []
confidences = []
@ -47,7 +103,7 @@ def decode(scores, geometry, scoreThresh):
score = scoresData[x]
# If score is lower than threshold score, move to next x
if(score < scoreThresh):
if (score < scoreThresh):
continue
# Calculate offset
@ -66,24 +122,27 @@ def decode(scores, geometry, scoreThresh):
# Find points for rectangle
p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
p3 = (-cosA * w + offset[0], sinA * w + offset[1])
center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1]))
detections.append((center, (w,h), -1*angle * 180.0 / math.pi))
p3 = (-cosA * w + offset[0], sinA * w + offset[1])
center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1]))
detections.append((center, (w, h), -1 * angle * 180.0 / math.pi))
confidences.append(float(score))
# Return detections and confidences
return [detections, confidences]
def main():
# Read and store arguments
confThreshold = args.thr
nmsThreshold = args.nms
inpWidth = args.width
inpHeight = args.height
model = args.model
modelDetector = args.model
modelRecognition = args.ocr
# Load network
net = cv.dnn.readNet(model)
detector = cv.dnn.readNet(modelDetector)
recognizer = cv.dnn.readNet(modelRecognition)
# Create a new named window
kWinName = "EAST: An Efficient and Accurate Scene Text Detector"
@ -95,6 +154,7 @@ def main():
# Open a video file or an image file or a camera stream
cap = cv.VideoCapture(args.input if args.input else 0)
tickmeter = cv.TickMeter()
while cv.waitKey(1) < 0:
# Read frame
hasFrame, frame = cap.read()
@ -111,19 +171,20 @@ def main():
# Create a 4D blob from frame.
blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False)
# Run the model
net.setInput(blob)
outs = net.forward(outNames)
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
# Run the detection model
detector.setInput(blob)
tickmeter.start()
outs = detector.forward(outNames)
tickmeter.stop()
# Get scores and geometry
scores = outs[0]
geometry = outs[1]
[boxes, confidences] = decode(scores, geometry, confThreshold)
[boxes, confidences] = decodeBoundingBoxes(scores, geometry, confThreshold)
# Apply NMS
indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,nmsThreshold)
indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
# get 4 corners of the rotated rect
vertices = cv.boxPoints(boxes[i[0]])
@ -131,16 +192,40 @@ def main():
for j in range(4):
vertices[j][0] *= rW
vertices[j][1] *= rH
# get cropped image using perspective transform
if modelRecognition:
cropped = fourPointsTransform(frame, vertices)
cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY)
# Create a 4D blob from cropped image
blob = cv.dnn.blobFromImage(cropped, size=(100, 32), mean=127.5, scalefactor=1 / 127.5)
recognizer.setInput(blob)
# Run the recognition model
tickmeter.start()
result = recognizer.forward()
tickmeter.stop()
# decode the result into text
wordRecognized = decodeText(result)
cv.putText(frame, wordRecognized, (int(vertices[1][0]), int(vertices[1][1])), cv.FONT_HERSHEY_SIMPLEX,
0.5, (255, 0, 0))
for j in range(4):
p1 = (vertices[j][0], vertices[j][1])
p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
cv.line(frame, p1, p2, (0, 255, 0), 1)
# Put efficiency information
label = 'Inference time: %.2f ms' % (tickmeter.getTimeMilli())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
# Display the frame
cv.imshow(kWinName,frame)
cv.imshow(kWinName, frame)
tickmeter.reset()
if __name__ == "__main__":
main()

Loading…
Cancel
Save