Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/17231/head
Alexander Alekhin 5 years ago
commit 09799402f9
  1. 2
      modules/calib3d/test/test_fisheye.cpp
  2. 18
      modules/dnn/src/darknet/darknet_io.cpp
  3. 17
      modules/dnn/src/layers/region_layer.cpp
  4. 59
      modules/dnn/src/layers/slice_layer.cpp
  5. 5
      modules/dnn/test/test_darknet_importer.cpp
  6. 34
      modules/dnn/test/test_layers.cpp
  7. 6
      modules/dnn/test/test_onnx_importer.cpp
  8. 10
      modules/video/src/lkpyramid.cpp
  9. 4
      modules/video/src/lkpyramid.hpp
  10. 141
      samples/dnn/text_detection.cpp

@ -101,7 +101,7 @@ TEST_F(fisheyeTest, projectPoints)
EXPECT_MAT_NEAR(distorted0, distorted2, 1e-10);
}
TEST_F(fisheyeTest, DISABLED_undistortImage)
TEST_F(fisheyeTest, undistortImage)
{
cv::Matx33d theK = this->K;
cv::Mat theD = cv::Mat(this->D);

@ -229,6 +229,10 @@ namespace cv {
{
activation_param.type = "Swish";
}
else if (type == "mish")
{
activation_param.type = "Mish";
}
else if (type == "logistic")
{
activation_param.type = "Sigmoid";
@ -436,7 +440,7 @@ namespace cv {
fused_layer_names.push_back(last_layer);
}
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold)
void setYolo(int classes, const std::vector<int>& mask, const std::vector<float>& anchors, float thresh, float nms_threshold, float scale_x_y)
{
cv::dnn::LayerParams region_param;
region_param.name = "Region-name";
@ -449,6 +453,7 @@ namespace cv {
region_param.set<bool>("logistic", true);
region_param.set<float>("thresh", thresh);
region_param.set<float>("nms_threshold", nms_threshold);
region_param.set<float>("scale_x_y", scale_x_y);
std::vector<float> usedAnchors(numAnchors * 2);
for (int i = 0; i < numAnchors; ++i)
@ -786,6 +791,7 @@ namespace cv {
int num_of_anchors = getParam<int>(layer_params, "num", -1);
float thresh = getParam<float>(layer_params, "thresh", 0.2);
float nms_threshold = getParam<float>(layer_params, "nms_threshold", 0.4);
float scale_x_y = getParam<float>(layer_params, "scale_x_y", 1.0);
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
CV_Assert(!anchors_values.empty());
@ -798,7 +804,7 @@ namespace cv {
CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
setParams.setPermute(false);
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold);
setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y);
}
else {
CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
@ -813,6 +819,10 @@ namespace cv {
{
setParams.setActivation("swish");
}
else if (activation == "mish")
{
setParams.setActivation("mish");
}
else if (activation == "logistic")
{
setParams.setActivation("logistic");
@ -935,8 +945,8 @@ namespace cv {
}
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
if(activation == "leaky" || activation == "swish" || activation == "logistic")
++cv_layers_counter; // For ReLU, Swish, Sigmoid
if(activation == "leaky" || activation == "swish" || activation == "mish" || activation == "logistic")
++cv_layers_counter; // For ReLU, Swish, Mish, Sigmoid
if(!darknet_layers_counter)
tensor_shape.resize(1);

@ -69,7 +69,7 @@ class RegionLayerImpl CV_FINAL : public RegionLayer
{
public:
int coords, classes, anchors, classfix;
float thresh, nmsThreshold;
float thresh, nmsThreshold, scale_x_y;
bool useSoftmax, useLogistic;
#ifdef HAVE_OPENCL
UMat blob_umat;
@ -88,6 +88,7 @@ public:
useSoftmax = params.get<bool>("softmax", false);
useLogistic = params.get<bool>("logistic", false);
nmsThreshold = params.get<float>("nms_threshold", 0.4);
scale_x_y = params.get<float>("scale_x_y", 1.0); // Yolov4
CV_Assert(nmsThreshold >= 0.);
CV_Assert(coords == 4);
@ -302,8 +303,10 @@ public:
if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
int box_index = index_sample_offset + index * cell_size;
dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
dstData[box_index + 0] = (x + x_tmp) / cols;
dstData[box_index + 1] = (y + y_tmp) / rows;
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;
@ -471,6 +474,8 @@ public:
auto shape_3d = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{boxes_shape.size()}, boxes_shape.data());
ngraph::Shape box_broad_shape{1, (size_t)anchors, (size_t)h, (size_t)w};
auto scale_x_y_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &scale_x_y);
auto shift_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{0.5});
std::shared_ptr<ngraph::Node> box_x;
{
@ -478,6 +483,9 @@ public:
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, cols});
box_x = std::make_shared<ngraph::op::v1::StridedSlice>(input2d, lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
box_x = std::make_shared<ngraph::op::Sigmoid>(box_x);
box_x = std::make_shared<ngraph::op::v1::Subtract>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_x = std::make_shared<ngraph::op::v1::Multiply>(box_x, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
box_x = std::make_shared<ngraph::op::v1::Add>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, shape_3d, true);
std::vector<float> x_indices(w * h * anchors);
@ -504,6 +512,9 @@ public:
auto upper_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{2, cols});
box_y = std::make_shared<ngraph::op::v1::StridedSlice>(input2d, lower_bounds, upper_bounds, strides, std::vector<int64_t>{}, std::vector<int64_t>{});
box_y = std::make_shared<ngraph::op::Sigmoid>(box_y);
box_y = std::make_shared<ngraph::op::v1::Subtract>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_y = std::make_shared<ngraph::op::v1::Multiply>(box_y, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
box_y = std::make_shared<ngraph::op::v1::Add>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, shape_3d, true);
std::vector<float> y_indices(h * anchors);

@ -172,18 +172,19 @@ public:
CV_Assert(inputs.size() == 1);
const MatSize& inpShape = inputs[0].size;
finalSliceRanges = sliceRanges;
if (sliceRanges.empty())
{
// Divide input blob on equal parts by axis.
int outAxisSize = inpShape[axis] / outputs.size();
sliceRanges.resize(outputs.size(),
std::vector<Range>(axis + 1, Range::all()));
finalSliceRanges.resize(outputs.size(),
std::vector<Range>(axis + 1, Range::all()));
int prevSlice = 0;
for (int i = 0; i < outputs.size(); ++i)
{
sliceRanges[i][axis].start = prevSlice;
sliceRanges[i][axis].end = sliceRanges[i][axis].start + outAxisSize;
prevSlice = sliceRanges[i][axis].end;
finalSliceRanges[i][axis].start = prevSlice;
finalSliceRanges[i][axis].end = finalSliceRanges[i][axis].start + outAxisSize;
prevSlice = finalSliceRanges[i][axis].end;
}
}
else
@ -191,16 +192,16 @@ public:
for (int i = 0; i < outputs.size(); ++i)
{
CV_Assert(sliceRanges[i].size() <= inpShape.dims());
CV_Assert(finalSliceRanges[i].size() <= inpShape.dims());
// Fill the rest of ranges.
for (int j = sliceRanges[i].size(); j < inpShape.dims(); ++j)
for (int j = finalSliceRanges[i].size(); j < inpShape.dims(); ++j)
{
sliceRanges[i].push_back(Range::all());
finalSliceRanges[i].push_back(Range::all());
}
// Clamp.
for (int j = 0; j < sliceRanges[i].size(); ++j)
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
{
sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
finalSliceRanges[i][j] = clamp(finalSliceRanges[i][j], inpShape[j]);
}
}
}
@ -241,8 +242,8 @@ public:
kernel.set(idx++, (int)(rows * cols));
kernel.set(idx++, (int)inpMat.size[3]);
kernel.set(idx++, (int)cols);
kernel.set(idx++, (int)sliceRanges[i][2].start);
kernel.set(idx++, (int)sliceRanges[i][3].start);
kernel.set(idx++, (int)finalSliceRanges[i][2].start);
kernel.set(idx++, (int)finalSliceRanges[i][3].start);
kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i]));
bool ret = kernel.run(1, global, local, false);
if (!ret)
@ -266,10 +267,10 @@ public:
outputs_arr.getMatVector(outputs);
const Mat& inpMat = inputs[0];
CV_Assert(outputs.size() == sliceRanges.size());
CV_Assert(outputs.size() == finalSliceRanges.size());
for (size_t i = 0; i < outputs.size(); i++)
{
inpMat(sliceRanges[i]).copyTo(outputs[i]);
inpMat(finalSliceRanges[i]).copyTo(outputs[i]);
}
}
@ -278,11 +279,11 @@ public:
#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1)
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
{
CV_Assert_N(sliceRanges.size() == 1, inputs.size() <= 2);
CV_Assert_N(finalSliceRanges.size() == 1, inputs.size() <= 2);
std::vector<size_t> axes, offsets, dims;
int from, to, step;
int numDims = sliceRanges[0].size();
int numDims = finalSliceRanges[0].size();
if (preferableTarget == DNN_TARGET_MYRIAD)
{
from = axis;
@ -298,8 +299,8 @@ public:
for (int i = from; i != to; i += step)
{
axes.push_back(i);
offsets.push_back(sliceRanges[0][i].start);
dims.push_back(sliceRanges[0][i].size());
offsets.push_back(finalSliceRanges[0][i].start);
dims.push_back(finalSliceRanges[0][i].size());
}
InferenceEngine::Builder::Layer ieLayer(name);
@ -315,7 +316,7 @@ public:
{
std::vector<size_t> outShape(numDims);
for (int i = 0; i < numDims; ++i)
outShape[i] = sliceRanges[0][i].size();
outShape[i] = finalSliceRanges[0][i].size();
ieLayer.getInputPorts()[1].setParameter("type", "weights");
@ -338,13 +339,13 @@ public:
{
CV_Assert_N(nodes.size() <= 2);
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
CV_Assert(sliceRanges[0].size() == ieInpNode->get_shape().size());
CV_Assert(finalSliceRanges[0].size() == ieInpNode->get_shape().size());
std::vector<int64_t> offsets, dims;
for (int i = 0; i < sliceRanges[0].size(); ++i)
for (int i = 0; i < finalSliceRanges[0].size(); ++i)
{
offsets.push_back(sliceRanges[0][i].start);
dims.push_back(sliceRanges[0][i].end);
offsets.push_back(finalSliceRanges[0][i].start);
dims.push_back(finalSliceRanges[0][i].end);
}
auto lower_bounds = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
@ -384,6 +385,10 @@ public:
}
#endif
protected:
// The actual non-negative values determined from @p sliceRanges depends on input size.
std::vector<std::vector<Range> > finalSliceRanges;
};
class CropLayerImpl CV_FINAL : public SliceLayerImpl
@ -447,18 +452,18 @@ public:
offset_final[i] = offset[i - start_axis];
}
sliceRanges.resize(1);
sliceRanges[0].resize(dims);
finalSliceRanges.resize(1);
finalSliceRanges[0].resize(dims);
for (int i = 0; i < start_axis; i++)
{
sliceRanges[0][i] = Range(0, inpBlob.size[i]);
finalSliceRanges[0][i] = Range(0, inpBlob.size[i]);
}
for (int i = start_axis; i < dims; i++)
{
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i])
CV_Error(Error::StsBadArg, "invalid crop parameters or blob sizes");
sliceRanges[0][i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]);
finalSliceRanges[0][i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]);
}
}

@ -549,6 +549,11 @@ TEST_P(Test_Darknet_layers, upsample)
testDarknetLayer("upsample");
}
TEST_P(Test_Darknet_layers, mish)
{
testDarknetLayer("mish", true);
}
TEST_P(Test_Darknet_layers, avgpool_softmax)
{
testDarknetLayer("avgpool_softmax");

@ -1791,4 +1791,38 @@ TEST_P(Layer_Test_Resize, change_input)
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Resize, dnnBackendsAndTargets());
typedef testing::TestWithParam<tuple<Backend, Target> > Layer_Test_Slice;
TEST_P(Layer_Test_Slice, variable_input_shape)
{
int backendId = get<0>(GetParam());
int targetId = get<1>(GetParam());
int begin[] = {0, 0, 0, 0};
int end[] = {-1, -1, -1, -1};
Net net;
LayerParams lp;
lp.type = "Slice";
lp.name = "testLayer";
lp.set("begin", DictValue::arrayInt<int*>(&begin[0], 4));
lp.set("end", DictValue::arrayInt<int*>(&end[0], 4));
net.addLayerToPrev(lp.name, lp.type, lp);
for (int i = 0; i < 2; ++i)
{
Mat inp(4 + i, 5 + i, CV_8UC1);
randu(inp, 0, 255);
inp = blobFromImage(inp);
net.setInput(inp);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
Mat out = net.forward();
normAssert(out, inp);
}
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Slice, dnnBackendsAndTargets());
}} // namespace

@ -559,7 +559,7 @@ public:
TEST_P(Test_ONNX_nets, Alexnet)
{
#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL)
#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32))
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
#else
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
@ -623,7 +623,7 @@ TEST_P(Test_ONNX_nets, Googlenet)
TEST_P(Test_ONNX_nets, CaffeNet)
{
#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL)
#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32))
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
#else
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
@ -639,7 +639,7 @@ TEST_P(Test_ONNX_nets, CaffeNet)
TEST_P(Test_ONNX_nets, RCNN_ILSVRC13)
{
#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL)
#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32))
applyTestTag(CV_TEST_TAG_MEMORY_2GB);
#else
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);

@ -55,19 +55,19 @@
namespace
{
static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
static void calcScharrDeriv(const cv::Mat& src, cv::Mat& dst)
{
using namespace cv;
using cv::detail::deriv_type;
int rows = src.rows, cols = src.cols, cn = src.channels(), depth = src.depth();
CV_Assert(depth == CV_8U);
dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
parallel_for_(Range(0, rows), cv::detail::SharrDerivInvoker(src, dst), cv::getNumThreads());
parallel_for_(Range(0, rows), cv::detail::ScharrDerivInvoker(src, dst), cv::getNumThreads());
}
}//namespace
void cv::detail::SharrDerivInvoker::operator()(const Range& range) const
void cv::detail::ScharrDerivInvoker::operator()(const Range& range) const
{
using cv::detail::deriv_type;
int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn;
@ -801,7 +801,7 @@ int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Si
deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);
Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
calcSharrDeriv(thisLevel, derivI);
calcScharrDeriv(thisLevel, derivI);
if(derivBorder != BORDER_TRANSPARENT)
copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
@ -1382,7 +1382,7 @@ void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
Mat _derivI( imgSize.height + winSize.height*2,
imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.ptr() );
derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
calcScharrDeriv(prevPyr[level * lvlStep1], derivI);
copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
}
else

@ -7,9 +7,9 @@ namespace detail
typedef short deriv_type;
struct SharrDerivInvoker : ParallelLoopBody
struct ScharrDerivInvoker : ParallelLoopBody
{
SharrDerivInvoker(const Mat& _src, const Mat& _dst)
ScharrDerivInvoker(const Mat& _src, const Mat& _dst)
: src(_src), dst(_dst)
{ }

@ -1,3 +1,20 @@
/*
Text detection model: https://github.com/argman/EAST
Download link: https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1
Text recognition model taken from here: https://github.com/meijieru/crnn.pytorch
How to convert from pb to onnx:
Using classes from here: https://github.com/meijieru/crnn.pytorch/blob/master/models/crnn.py
import torch
import models.crnn as crnn
model = CRNN(32, 1, 37, 256)
model.load_state_dict(torch.load('crnn.pth'))
dummy_input = torch.randn(1, 1, 32, 100)
torch.onnx.export(model, dummy_input, "crnn.onnx", verbose=True)
*/
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
@ -8,21 +25,26 @@ using namespace cv::dnn;
const char* keys =
"{ help h | | Print help message. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ model m | | Path to a binary .pb file contains trained network.}"
"{ model m | | Path to a binary .pb file contains trained detector network.}"
"{ ocr | | Path to a binary .pb or .onnx file contains trained recognition network.}"
"{ width | 320 | Preprocess input image by resizing to a specific width. It should be multiple by 32. }"
"{ height | 320 | Preprocess input image by resizing to a specific height. It should be multiple by 32. }"
"{ thr | 0.5 | Confidence threshold. }"
"{ nms | 0.4 | Non-maximum suppression threshold. }";
void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences);
void decodeBoundingBoxes(const Mat& scores, const Mat& geometry, float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences);
void fourPointsTransform(const Mat& frame, Point2f vertices[4], Mat& result);
void decodeText(const Mat& scores, std::string& text);
int main(int argc, char** argv)
{
// Parse command line arguments.
CommandLineParser parser(argc, argv, keys);
parser.about("Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of "
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)");
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
@ -33,7 +55,8 @@ int main(int argc, char** argv)
float nmsThreshold = parser.get<float>("nms");
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
String model = parser.get<String>("model");
String modelDecoder = parser.get<String>("model");
String modelRecognition = parser.get<String>("ocr");
if (!parser.check())
{
@ -41,17 +64,19 @@ int main(int argc, char** argv)
return 1;
}
CV_Assert(!model.empty());
CV_Assert(!modelDecoder.empty());
// Load networks.
Net detector = readNet(modelDecoder);
Net recognizer;
// Load network.
Net net = readNet(model);
if (!modelRecognition.empty())
recognizer = readNet(modelRecognition);
// Open a video file or an image file or a camera stream.
VideoCapture cap;
if (parser.has("input"))
cap.open(parser.get<String>("input"));
else
cap.open(0);
bool openSuccess = parser.has("input") ? cap.open(parser.get<String>("input")) : cap.open(0);
CV_Assert(openSuccess);
static const std::string kWinName = "EAST: An Efficient and Accurate Scene Text Detector";
namedWindow(kWinName, WINDOW_NORMAL);
@ -62,6 +87,7 @@ int main(int argc, char** argv)
outNames[1] = "feature_fusion/concat_3";
Mat frame, blob;
TickMeter tickMeter;
while (waitKey(1) < 0)
{
cap >> frame;
@ -72,8 +98,10 @@ int main(int argc, char** argv)
}
blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false);
net.setInput(blob);
net.forward(outs, outNames);
detector.setInput(blob);
tickMeter.start();
detector.forward(outs, outNames);
tickMeter.stop();
Mat scores = outs[0];
Mat geometry = outs[1];
@ -81,43 +109,64 @@ int main(int argc, char** argv)
// Decode predicted bounding boxes.
std::vector<RotatedRect> boxes;
std::vector<float> confidences;
decode(scores, geometry, confThreshold, boxes, confidences);
decodeBoundingBoxes(scores, geometry, confThreshold, boxes, confidences);
// Apply non-maximum suppression procedure.
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
// Render detections.
Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight);
// Render text.
for (size_t i = 0; i < indices.size(); ++i)
{
RotatedRect& box = boxes[indices[i]];
Point2f vertices[4];
box.points(vertices);
for (int j = 0; j < 4; ++j)
{
vertices[j].x *= ratio.x;
vertices[j].y *= ratio.y;
}
if (!modelRecognition.empty())
{
Mat cropped;
fourPointsTransform(frame, vertices, cropped);
cvtColor(cropped, cropped, cv::COLOR_BGR2GRAY);
Mat blobCrop = blobFromImage(cropped, 1.0/127.5, Size(), Scalar::all(127.5));
recognizer.setInput(blobCrop);
tickMeter.start();
Mat result = recognizer.forward();
tickMeter.stop();
std::string wordRecognized = "";
decodeText(result, wordRecognized);
putText(frame, wordRecognized, vertices[1], FONT_HERSHEY_SIMPLEX, 1.5, Scalar(0, 0, 255));
}
for (int j = 0; j < 4; ++j)
line(frame, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1);
}
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
std::string label = format("Inference time: %.2f ms", tickMeter.getTimeMilli());
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow(kWinName, frame);
tickMeter.reset();
}
return 0;
}
void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences)
void decodeBoundingBoxes(const Mat& scores, const Mat& geometry, float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences)
{
detections.clear();
CV_Assert(scores.dims == 4); CV_Assert(geometry.dims == 4); CV_Assert(scores.size[0] == 1);
@ -159,3 +208,51 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh,
}
}
}
void fourPointsTransform(const Mat& frame, Point2f vertices[4], Mat& result)
{
const Size outputSize = Size(100, 32);
Point2f targetVertices[4] = {Point(0, outputSize.height - 1),
Point(0, 0), Point(outputSize.width - 1, 0),
Point(outputSize.width - 1, outputSize.height - 1),
};
Mat rotationMatrix = getPerspectiveTransform(vertices, targetVertices);
warpPerspective(frame, result, rotationMatrix, outputSize);
}
void decodeText(const Mat& scores, std::string& text)
{
static const std::string alphabet = "0123456789abcdefghijklmnopqrstuvwxyz";
Mat scoresMat = scores.reshape(1, scores.size[0]);
std::vector<char> elements;
elements.reserve(scores.size[0]);
for (int rowIndex = 0; rowIndex < scoresMat.rows; ++rowIndex)
{
Point p;
minMaxLoc(scoresMat.row(rowIndex), 0, 0, 0, &p);
if (p.x > 0 && static_cast<size_t>(p.x) <= alphabet.size())
{
elements.push_back(alphabet[p.x - 1]);
}
else
{
elements.push_back('-');
}
}
if (elements.size() > 0 && elements[0] != '-')
text += elements[0];
for (size_t elementIndex = 1; elementIndex < elements.size(); ++elementIndex)
{
if (elementIndex > 0 && elements[elementIndex] != '-' &&
elements[elementIndex - 1] != elements[elementIndex])
{
text += elements[elementIndex];
}
}
}
Loading…
Cancel
Save