From 8488f2e26524d6bcf476c882e8462792ee11d0ab Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 24 Apr 2018 18:25:43 +0300 Subject: [PATCH 1/2] EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2) --- .../dnn_custom_layers/dnn_custom_layers.md | 22 +-- modules/dnn/include/opencv2/dnn/dnn.hpp | 4 + modules/dnn/src/nms.cpp | 24 +++ .../src/tensorflow/tf_graph_simplifier.cpp | 32 ++++ modules/dnn/src/tensorflow/tf_importer.cpp | 20 +++ modules/dnn/test/test_tf_importer.cpp | 74 +++++++- .../custom_layers.hpp} | 141 +++++++++------ samples/dnn/text_detection.cpp | 169 ++++++++++++++++++ 8 files changed, 411 insertions(+), 75 deletions(-) rename samples/{cpp/tutorial_code/dnn/custom_layers.cpp => dnn/custom_layers.hpp} (81%) create mode 100644 samples/dnn/text_detection.cpp diff --git a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md index 5b3f3c7347..f367946620 100644 --- a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md +++ b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md @@ -32,11 +32,11 @@ Unspecified error: Can't create layer "layer_name" of type "MyType" in function To import the model correctly you have to derive a class from cv::dnn::Layer with the following methods: -@snippet dnn/custom_layers.cpp A custom layer interface +@snippet dnn/custom_layers.hpp A custom layer interface And register it before the import: -@snippet dnn/custom_layers.cpp Register a custom layer +@snippet dnn/custom_layers.hpp Register a custom layer @note `MyType` is a type of unimplemented layer from the thrown exception. @@ -44,27 +44,27 @@ Let's see what all the methods do: - Constructor -@snippet dnn/custom_layers.cpp MyLayer::MyLayer +@snippet dnn/custom_layers.hpp MyLayer::MyLayer Retrieves hyper-parameters from cv::dnn::LayerParams. If your layer has trainable weights they will be already stored in the Layer's member cv::dnn::Layer::blobs. - A static method `create` -@snippet dnn/custom_layers.cpp MyLayer::create +@snippet dnn/custom_layers.hpp MyLayer::create This method should create an instance of you layer and return cv::Ptr with it. - Output blobs' shape computation -@snippet dnn/custom_layers.cpp MyLayer::getMemoryShapes +@snippet dnn/custom_layers.hpp MyLayer::getMemoryShapes Returns layer's output shapes depends on input shapes. You may request an extra memory using `internals`. - Run a layer -@snippet dnn/custom_layers.cpp MyLayer::forward +@snippet dnn/custom_layers.hpp MyLayer::forward Implement a layer's logic here. Compute outputs for given inputs. @@ -74,7 +74,7 @@ the second invocation of `forward` will has the same data at `outputs` and `inte - Optional `finalize` method -@snippet dnn/custom_layers.cpp MyLayer::finalize +@snippet dnn/custom_layers.hpp MyLayer::finalize The chain of methods are the following: OpenCV deep learning engine calls `create` method once then it calls `getMemoryShapes` for an every created layer then you @@ -108,11 +108,11 @@ layer { This way our implementation can look like: -@snippet dnn/custom_layers.cpp InterpLayer +@snippet dnn/custom_layers.hpp InterpLayer Next we need to register a new layer type and try to import the model. -@snippet dnn/custom_layers.cpp Register InterpLayer +@snippet dnn/custom_layers.hpp Register InterpLayer ## Example: custom layer from TensorFlow This is an example of how to import a network with [tf.image.resize_bilinear](https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_bilinear) @@ -185,11 +185,11 @@ Custom layers import from TensorFlow is designed to put all layer's `attr` into cv::dnn::LayerParams but input `Const` blobs into cv::dnn::Layer::blobs. In our case resize's output shape will be stored in layer's `blobs[0]`. -@snippet dnn/custom_layers.cpp ResizeBilinearLayer +@snippet dnn/custom_layers.hpp ResizeBilinearLayer Next we register a layer and try to import the model. -@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer +@snippet dnn/custom_layers.hpp Register ResizeBilinearLayer ## Define a custom layer in Python The following example shows how to customize OpenCV's layers in Python. diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 7f8c7e7499..6ac2f1a7fe 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -826,6 +826,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_OUT std::vector& indices, const float eta = 1.f, const int top_k = 0); + CV_EXPORTS void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + const float score_threshold, const float nms_threshold, + CV_OUT std::vector& indices, + const float eta = 1.f, const int top_k = 0); //! @} CV__DNN_EXPERIMENTAL_NS_END diff --git a/modules/dnn/src/nms.cpp b/modules/dnn/src/nms.cpp index 3adaef165d..2ce1257cad 100644 --- a/modules/dnn/src/nms.cpp +++ b/modules/dnn/src/nms.cpp @@ -8,6 +8,8 @@ #include "precomp.hpp" #include "nms.inl.hpp" +#include + namespace cv { namespace dnn @@ -28,6 +30,28 @@ void NMSBoxes(const std::vector& bboxes, const std::vector& scores, NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rectOverlap); } +static inline float rotatedRectIOU(const RotatedRect& a, const RotatedRect& b) +{ + std::vector inter, hull; + int res = rotatedRectangleIntersection(a, b, inter); + if (inter.empty() || res == INTERSECT_NONE) + return 0.0f; + if (res == INTERSECT_FULL) + return 1.0f; + convexHull(inter, hull); + float interArea = contourArea(hull); + return interArea / (a.size.area() + b.size.area() - interArea); +} + +void NMSBoxes(const std::vector& bboxes, const std::vector& scores, + const float score_threshold, const float nms_threshold, + std::vector& indices, const float eta, const int top_k) +{ + CV_Assert(bboxes.size() == scores.size(), score_threshold >= 0, + nms_threshold >= 0, eta > 0); + NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rotatedRectIOU); +} + CV__DNN_EXPERIMENTAL_NS_END }// dnn }// cv diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index 677f57ab7d..9208588e65 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -538,6 +538,37 @@ public: } }; +// In case of resizing by factor. +class ResizeBilinearSubgraph : public Subgraph +{ +public: + ResizeBilinearSubgraph() + { + int input = addNodeToMatch(""); + + int shape = addNodeToMatch("Shape", input); + int stack = addNodeToMatch("Const"); + int stack_1 = addNodeToMatch("Const"); + int stack_2 = addNodeToMatch("Const"); + int strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2); + int factorY = addNodeToMatch("Const"); + int mul = addNodeToMatch("Mul", strided_slice, factorY); + + shape = addNodeToMatch("Shape", input); + stack = addNodeToMatch("Const"); + stack_1 = addNodeToMatch("Const"); + stack_2 = addNodeToMatch("Const"); + strided_slice = addNodeToMatch("StridedSlice", shape, stack, stack_1, stack_2); + int factorX = addNodeToMatch("Const"); + int mul_1 = addNodeToMatch("Mul", strided_slice, factorX); + + int pack = addNodeToMatch("Pack", mul, mul_1); + + addNodeToMatch("ResizeBilinear", input, pack); + setFusedNode("ResizeBilinear", input, factorY, factorX); + } +}; + void simplifySubgraphs(tensorflow::GraphDef& net) { std::vector > subgraphs; @@ -551,6 +582,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net) subgraphs.push_back(Ptr(new L2NormalizeSubgraph())); subgraphs.push_back(Ptr(new DeconvolutionValidKerasSubgraph())); subgraphs.push_back(Ptr(new DeconvolutionSameKerasSubgraph())); + subgraphs.push_back(Ptr(new ResizeBilinearSubgraph())); int numNodes = net.node_size(); std::vector matchedNodesIds; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 667e573705..efedbceb48 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -767,6 +767,26 @@ void TFImporter::populateNet(Net dstNet) } } } + else if (type == "Sub") + { + bool haveConst = false; + for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii) + { + Pin input = parsePin(layer.input(ii)); + haveConst = value_id.find(input.name) != value_id.end(); + } + CV_Assert(haveConst); + + layerParams.blobs.resize(1); + blobFromTensor(getConstBlob(layer, value_id), layerParams.blobs[0]); + layerParams.blobs[0] *= -1; + + int id = dstNet.addLayer(name, "Shift", layerParams); + layer_id[name] = id; + + // one input only + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + } else if (type == "MatMul") { CV_Assert(layer.input_size() == 2); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 397aadfa08..64cfcb932a 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -373,9 +373,24 @@ public: ResizeBilinearLayer(const LayerParams ¶ms) : Layer(params) { CV_Assert(!params.get("align_corners", false)); - CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1); - outHeight = blobs[0].at(0, 0); - outWidth = blobs[0].at(0, 1); + CV_Assert(!blobs.empty()); + + for (size_t i = 0; i < blobs.size(); ++i) + CV_Assert(blobs[i].type() == CV_32SC1); + + if (blobs.size() == 1) + { + CV_Assert(blobs[0].total() == 2); + outHeight = blobs[0].at(0, 0); + outWidth = blobs[0].at(0, 1); + } + else + { + CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1); + factorHeight = blobs[0].at(0, 0); + factorWidth = blobs[1].at(0, 0); + outHeight = outWidth = 0; + } } static Ptr create(LayerParams& params) @@ -391,12 +406,21 @@ public: std::vector outShape(4); outShape[0] = inputs[0][0]; // batch size outShape[1] = inputs[0][1]; // number of channels - outShape[2] = outHeight; - outShape[3] = outWidth; + outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight); + outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth); outputs.assign(1, outShape); return false; } + virtual void finalize(const std::vector& inputs, std::vector &outputs) CV_OVERRIDE + { + if (!outWidth && !outHeight) + { + outHeight = outputs[0].size[2]; + outWidth = outputs[0].size[3]; + } + } + // This implementation is based on a reference implementation from // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE @@ -447,13 +471,51 @@ private: return x + size[3] * (y + size[2] * (c + size[1] * b)); } - int outWidth, outHeight; + int outWidth, outHeight, factorWidth, factorHeight; }; TEST(Test_TensorFlow, resize_bilinear) { CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); runTensorFlowNet("resize_bilinear"); + runTensorFlowNet("resize_bilinear_factor"); + LayerFactory::unregisterLayer("ResizeBilinear"); +} + +// inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png') +// inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3) +// outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'), +// sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')], +// feed_dict={'input_images:0': inp}) +// scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2)) +// geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2)) +// np.save('east_text_detection.scores.npy', scores) +// np.save('east_text_detection.geometry.npy', geometry) +TEST(Test_TensorFlow, EAST_text_detection) +{ + CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); + std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false); + std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false); + std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false); + std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false); + + Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false)); + + Mat img = imread(imgPath); + Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false); + net.setInput(inp); + + std::vector outs; + std::vector outNames(2); + outNames[0] = "feature_fusion/Conv_7/Sigmoid"; + outNames[1] = "feature_fusion/concat_3"; + net.forward(outs, outNames); + + Mat scores = outs[0]; + Mat geometry = outs[1]; + + normAssert(scores, blobFromNPY(refScoresPath), "scores"); + normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 5e-5, 1e-3); LayerFactory::unregisterLayer("ResizeBilinear"); } diff --git a/samples/cpp/tutorial_code/dnn/custom_layers.cpp b/samples/dnn/custom_layers.hpp similarity index 81% rename from samples/cpp/tutorial_code/dnn/custom_layers.cpp rename to samples/dnn/custom_layers.hpp index 217e53659f..918cc8ae46 100644 --- a/samples/cpp/tutorial_code/dnn/custom_layers.cpp +++ b/samples/dnn/custom_layers.hpp @@ -1,35 +1,8 @@ -#include - -//! [A custom layer interface] -class MyLayer : public cv::dnn::Layer -{ -public: - //! [MyLayer::MyLayer] - MyLayer(const cv::dnn::LayerParams ¶ms); - //! [MyLayer::MyLayer] - - //! [MyLayer::create] - static cv::Ptr create(cv::dnn::LayerParams& params); - //! [MyLayer::create] - - //! [MyLayer::getMemoryShapes] - virtual bool getMemoryShapes(const std::vector > &inputs, - const int requiredOutputs, - std::vector > &outputs, - std::vector > &internals) const CV_OVERRIDE; - //! [MyLayer::getMemoryShapes] - - //! [MyLayer::forward] - virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE; - //! [MyLayer::forward] - - //! [MyLayer::finalize] - virtual void finalize(const std::vector &inputs, std::vector &outputs) CV_OVERRIDE; - //! [MyLayer::finalize] +#ifndef __OPENCV_SAMPLES_DNN_CUSTOM_LAYERS__ +#define __OPENCV_SAMPLES_DNN_CUSTOM_LAYERS__ - virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE; -}; -//! [A custom layer interface] +#include +#include // getPlane //! [InterpLayer] class InterpLayer : public cv::dnn::Layer @@ -113,15 +86,33 @@ private: //! [InterpLayer] //! [ResizeBilinearLayer] -class ResizeBilinearLayer : public cv::dnn::Layer +class ResizeBilinearLayer CV_FINAL : public cv::dnn::Layer { public: ResizeBilinearLayer(const cv::dnn::LayerParams ¶ms) : Layer(params) { CV_Assert(!params.get("align_corners", false)); - CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1); - outHeight = blobs[0].at(0, 0); - outWidth = blobs[0].at(0, 1); + CV_Assert(!blobs.empty()); + + for (size_t i = 0; i < blobs.size(); ++i) + CV_Assert(blobs[i].type() == CV_32SC1); + + // There are two cases of input blob: a single blob which contains output + // shape and two blobs with scaling factors. + if (blobs.size() == 1) + { + CV_Assert(blobs[0].total() == 2); + outHeight = blobs[0].at(0, 0); + outWidth = blobs[0].at(0, 1); + factorHeight = factorWidth = 0; + } + else + { + CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1); + factorHeight = blobs[0].at(0, 0); + factorWidth = blobs[1].at(0, 0); + outHeight = outWidth = 0; + } } static cv::Ptr create(cv::dnn::LayerParams& params) @@ -130,25 +121,32 @@ public: } virtual bool getMemoryShapes(const std::vector > &inputs, - const int requiredOutputs, + const int, std::vector > &outputs, - std::vector > &internals) const CV_OVERRIDE + std::vector > &) const CV_OVERRIDE { - CV_UNUSED(requiredOutputs); CV_UNUSED(internals); std::vector outShape(4); outShape[0] = inputs[0][0]; // batch size outShape[1] = inputs[0][1]; // number of channels - outShape[2] = outHeight; - outShape[3] = outWidth; + outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight); + outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth); outputs.assign(1, outShape); return false; } + virtual void finalize(const std::vector&, std::vector &outputs) CV_OVERRIDE + { + if (!outWidth && !outHeight) + { + outHeight = outputs[0].size[2]; + outWidth = outputs[0].size[3]; + } + } + // This implementation is based on a reference implementation from // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h - virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &) CV_OVERRIDE { - CV_UNUSED(internals); cv::Mat& inp = *inputs[0]; cv::Mat& out = outputs[0]; const float* inpData = (float*)inp.data; @@ -195,19 +193,54 @@ private: return x + size[3] * (y + size[2] * (c + size[1] * b)); } - int outWidth, outHeight; + int outWidth, outHeight, factorWidth, factorHeight; }; //! [ResizeBilinearLayer] +// +// The folowing code is used only to generate tutorials documentation. +// + +//! [A custom layer interface] +class MyLayer : public cv::dnn::Layer +{ +public: + //! [MyLayer::MyLayer] + MyLayer(const cv::dnn::LayerParams ¶ms); + //! [MyLayer::MyLayer] + + //! [MyLayer::create] + static cv::Ptr create(cv::dnn::LayerParams& params); + //! [MyLayer::create] + + //! [MyLayer::getMemoryShapes] + virtual bool getMemoryShapes(const std::vector > &inputs, + const int requiredOutputs, + std::vector > &outputs, + std::vector > &internals) const CV_OVERRIDE; + //! [MyLayer::getMemoryShapes] + + //! [MyLayer::forward] + virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE; + //! [MyLayer::forward] + + //! [MyLayer::finalize] + virtual void finalize(const std::vector &inputs, std::vector &outputs) CV_OVERRIDE; + //! [MyLayer::finalize] + + virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals) CV_OVERRIDE; +}; +//! [A custom layer interface] + //! [Register a custom layer] -#include // CV_DNN_REGISTER_LAYER_CLASS macro +#include // CV_DNN_REGISTER_LAYER_CLASS -int main(int argc, char** argv) +static inline void loadNet() { - CV_DNN_REGISTER_LAYER_CLASS(MyType, MyLayer); + CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer); // ... //! [Register a custom layer] - CV_UNUSED(argc); CV_UNUSED(argv); + //! [Register InterpLayer] CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer); cv::dnn::Net caffeNet = cv::dnn::readNet("/path/to/config.prototxt", "/path/to/weights.caffemodel"); @@ -217,16 +250,8 @@ int main(int argc, char** argv) CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); cv::dnn::Net tfNet = cv::dnn::readNet("/path/to/graph.pb"); //! [Register ResizeBilinearLayer] -} -cv::Ptr MyLayer::create(cv::dnn::LayerParams& params) -{ - return cv::Ptr(new MyLayer(params)); + if (false) loadNet(); // To prevent unused function warning. } -MyLayer::MyLayer(const cv::dnn::LayerParams&) {} -bool MyLayer::getMemoryShapes(const std::vector >&, const int, - std::vector >&, - std::vector >&) const { return false; } -void MyLayer::forward(std::vector&, std::vector&, std::vector&) {} -void MyLayer::finalize(const std::vector&, std::vector&) {} -void MyLayer::forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {} + +#endif // __OPENCV_SAMPLES_DNN_CUSTOM_LAYERS__ diff --git a/samples/dnn/text_detection.cpp b/samples/dnn/text_detection.cpp new file mode 100644 index 0000000000..48157d8a13 --- /dev/null +++ b/samples/dnn/text_detection.cpp @@ -0,0 +1,169 @@ +#include +#include +#include + +#include "custom_layers.hpp" + +using namespace cv; +using namespace cv::dnn; + +const char* keys = + "{ help h | | Print help message. }" + "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" + "{ model m | | Path to a binary .pb file contains trained network.}" + "{ width | 320 | Preprocess input image by resizing to a specific width. It should be multiple by 32. }" + "{ height | 320 | Preprocess input image by resizing to a specific height. It should be multiple by 32. }" + "{ thr | 0.5 | Confidence threshold. }" + "{ nms | 0.4 | Non-maximum suppression threshold. }"; + +void decode(const Mat& scores, const Mat& geometry, float scoreThresh, + std::vector& detections, std::vector& confidences); + +int main(int argc, char** argv) +{ + // Parse command line arguments. + CommandLineParser parser(argc, argv, keys); + parser.about("Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of " + "EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)"); + if (argc == 1 || parser.has("help")) + { + parser.printMessage(); + return 0; + } + + float confThreshold = parser.get("thr"); + float nmsThreshold = parser.get("nms"); + int inpWidth = parser.get("width"); + int inpHeight = parser.get("height"); + CV_Assert(parser.has("model")); + String model = parser.get("model"); + + // Register a custom layer. + CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); + + // Load network. + Net net = readNet(model); + + // Open a video file or an image file or a camera stream. + VideoCapture cap; + if (parser.has("input")) + cap.open(parser.get("input")); + else + cap.open(0); + + static const std::string kWinName = "EAST: An Efficient and Accurate Scene Text Detector"; + namedWindow(kWinName, WINDOW_NORMAL); + + std::vector outs; + std::vector outNames(2); + outNames[0] = "feature_fusion/Conv_7/Sigmoid"; + outNames[1] = "feature_fusion/concat_3"; + + Mat frame, blob; + while (waitKey(1) < 0) + { + cap >> frame; + if (frame.empty()) + { + waitKey(); + break; + } + + blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false); + net.setInput(blob); + net.forward(outs, outNames); + + Mat scores = outs[0]; + Mat geometry = outs[1]; + + // Decode predicted bounding boxes. + std::vector boxes; + std::vector confidences; + decode(scores, geometry, confThreshold, boxes, confidences); + + // Apply non-maximum suppression procedure. + std::vector indices; + NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); + + // Render detections. + Point2f ratio((float)frame.cols / inpWidth, (float)frame.rows / inpHeight); + for (size_t i = 0; i < indices.size(); ++i) + { + RotatedRect& box = boxes[indices[i]]; + + Point2f vertices[4]; + box.points(vertices); + for (int j = 0; j < 4; ++j) + { + vertices[j].x *= ratio.x; + vertices[j].y *= ratio.y; + } + for (int j = 0; j < 4; ++j) + line(frame, vertices[j], vertices[(j + 1) % 4], Scalar(0, 255, 0), 1); + } + + // Put efficiency information. + std::vector layersTimes; + double freq = getTickFrequency() / 1000; + double t = net.getPerfProfile(layersTimes) / freq; + std::string label = format("Inference time: %.2f ms", t); + putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); + + imshow(kWinName, frame); + } + return 0; +} + +void decode(const Mat& scores, const Mat& geometry, float scoreThresh, + std::vector& detections, std::vector& confidences) +{ + detections.clear(); + CV_Assert(scores.dims == 4, geometry.dims == 4, scores.size[0] == 1, + geometry.size[0] == 1, scores.size[1] == 1, geometry.size[1] == 5, + scores.size[2] == geometry.size[2], scores.size[3] == geometry.size[3]); + + const int height = scores.size[2]; + const int width = scores.size[3]; + const int planeSize = height * width; + + float* scoresData = (float*)scores.data; + float* geometryData = (float*)geometry.data; + float* x0_data = geometryData; + float* x1_data = geometryData + planeSize; + float* x2_data = geometryData + planeSize * 2; + float* x3_data = geometryData + planeSize * 3; + float* anglesData = geometryData + planeSize * 4; + for (int y = 0; y < height; ++y) + { + for (int x = 0; x < width; ++x) + { + float score = scoresData[x]; + if (score < scoreThresh) + continue; + + // Decode a prediction. + + // Multiple by 4 because feature maps are 4 time less than input image. + float offsetX = x * 4.0f, offsetY = y * 4.0f; + float angle = anglesData[x]; + float cosA = std::cos(angle); + float sinA = std::sin(angle); + float h = x0_data[x] + x2_data[x]; + float w = x1_data[x] + x3_data[x]; + + Point2f offset(offsetX + cosA * x1_data[x] + sinA * x2_data[x], + offsetY - sinA * x1_data[x] + cosA * x2_data[x]); + Point2f p1 = Point2f(-sinA * h, -cosA * h) + offset; + Point2f p3 = Point2f(-cosA * w, sinA * w) + offset; + RotatedRect r(0.5f * (p1 + p3), Size2f(w, h), -angle * 180.0f / (float)CV_PI); + detections.push_back(r); + confidences.push_back(score); + } + scoresData += width; + x0_data += width; + x1_data += width; + x2_data += width; + x3_data += width; + anglesData += width; + } +} From 07dc6d2b450e7931c6f6970eb6cde600b28a99e2 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 30 Apr 2018 21:51:33 +0300 Subject: [PATCH 2/2] Return a convex hull from rotatedRectangleIntersection --- modules/dnn/src/nms.cpp | 5 +- modules/imgproc/src/intersection.cpp | 14 +- modules/imgproc/test/test_intersection.cpp | 459 ++++++++------------- samples/dnn/text_detection.cpp | 22 +- 4 files changed, 188 insertions(+), 312 deletions(-) diff --git a/modules/dnn/src/nms.cpp b/modules/dnn/src/nms.cpp index 2ce1257cad..62bda79c15 100644 --- a/modules/dnn/src/nms.cpp +++ b/modules/dnn/src/nms.cpp @@ -32,14 +32,13 @@ void NMSBoxes(const std::vector& bboxes, const std::vector& scores, static inline float rotatedRectIOU(const RotatedRect& a, const RotatedRect& b) { - std::vector inter, hull; + std::vector inter; int res = rotatedRectangleIntersection(a, b, inter); if (inter.empty() || res == INTERSECT_NONE) return 0.0f; if (res == INTERSECT_FULL) return 1.0f; - convexHull(inter, hull); - float interArea = contourArea(hull); + float interArea = contourArea(inter); return interArea / (a.size.area() + b.size.area() - interArea); } diff --git a/modules/imgproc/src/intersection.cpp b/modules/imgproc/src/intersection.cpp index 5da743a0f9..3e4a266b30 100644 --- a/modules/imgproc/src/intersection.cpp +++ b/modules/imgproc/src/intersection.cpp @@ -219,13 +219,15 @@ int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& r } } - // Get rid of dupes + // Get rid of dupes and order points. for( int i = 0; i < (int)intersection.size()-1; i++ ) { + float dx1 = intersection[i + 1].x - intersection[i].x; + float dy1 = intersection[i + 1].y - intersection[i].y; for( size_t j = i+1; j < intersection.size(); j++ ) { - float dx = intersection[i].x - intersection[j].x; - float dy = intersection[i].y - intersection[j].y; + float dx = intersection[j].x - intersection[i].x; + float dy = intersection[j].y - intersection[i].y; double d2 = dx*dx + dy*dy; // can be a really small number, need double here if( d2 < samePointEps*samePointEps ) @@ -235,6 +237,12 @@ int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& r intersection.pop_back(); j--; // restart check } + else if (dx1 * dy - dy1 * dx < 0) + { + std::swap(intersection[i + 1], intersection[j]); + dx1 = dx; + dy1 = dy; + } } } diff --git a/modules/imgproc/test/test_intersection.cpp b/modules/imgproc/test/test_intersection.cpp index e2d8f18e33..8d770d06ea 100644 --- a/modules/imgproc/test/test_intersection.cpp +++ b/modules/imgproc/test/test_intersection.cpp @@ -66,8 +66,27 @@ private: void test7(); void test8(); void test9(); + void test10(); + void test11(); + void test12(); + void test13(); + void test14(); }; +static void compare(const std::vector& test, const std::vector& target) +{ + ASSERT_EQ(test.size(), target.size()); + ASSERT_TRUE(test.size() < 4 || isContourConvex(test)); + ASSERT_TRUE(target.size() < 4 || isContourConvex(target)); + for( size_t i = 0; i < test.size(); i++ ) + { + double dx = test[i].x - target[i].x; + double dy = test[i].y - target[i].y; + double r = sqrt(dx*dx + dy*dy); + ASSERT_LT(r, ACCURACY); + } +} + void CV_RotatedRectangleIntersectionTest::run(int) { // See pics/intersection.png for the scenarios we are testing @@ -92,28 +111,20 @@ void CV_RotatedRectangleIntersectionTest::run(int) test7(); test8(); test9(); + test10(); + test11(); + test12(); + test13(); + test14(); } void CV_RotatedRectangleIntersectionTest::test1() { // no intersection - - RotatedRect rect1, rect2; - - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 12.0f; - - rect2.center.x = 10; - rect2.center.y = 10; - rect2.size.width = 2; - rect2.size.height = 2; - rect2.angle = 34.0f; + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 12.0f); + RotatedRect rect2(Point2f(10, 10), Size2f(2, 2), 34.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_NONE); @@ -123,375 +134,243 @@ void CV_RotatedRectangleIntersectionTest::test1() void CV_RotatedRectangleIntersectionTest::test2() { // partial intersection, rectangles translated - - RotatedRect rect1, rect2; - - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 0; - - rect2.center.x = 1; - rect2.center.y = 1; - rect2.size.width = 2; - rect2.size.height = 2; - rect2.angle = 0; + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(1, 1), Size2f(2, 2), 0.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_PARTIAL); - CV_Assert(vertices.size() == 4); - - vector possibleVertices(4); - possibleVertices[0] = Point2f(0.0f, 0.0f); - possibleVertices[1] = Point2f(1.0f, 1.0f); - possibleVertices[2] = Point2f(0.0f, 1.0f); - possibleVertices[3] = Point2f(1.0f, 0.0f); - - for( size_t i = 0; i < vertices.size(); i++ ) - { - double bestR = DBL_MAX; - - for( size_t j = 0; j < possibleVertices.size(); j++ ) - { - double dx = vertices[i].x - possibleVertices[j].x; - double dy = vertices[i].y - possibleVertices[j].y; - double r = sqrt(dx*dx + dy*dy); - - bestR = std::min(bestR, r); - } - - CV_Assert(bestR < ACCURACY); - } + vector targetVertices(4); + targetVertices[0] = Point2f(1.0f, 0.0f); + targetVertices[1] = Point2f(1.0f, 1.0f); + targetVertices[2] = Point2f(0.0f, 1.0f); + targetVertices[3] = Point2f(0.0f, 0.0f); + compare(vertices, targetVertices); } void CV_RotatedRectangleIntersectionTest::test3() { // partial intersection, rectangles rotated 45 degree on the corner, forms a triangle intersection - RotatedRect rect1, rect2; - - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 0; - - rect2.center.x = 1; - rect2.center.y = 1; - rect2.size.width = sqrt(2.0f); - rect2.size.height = 20; - rect2.angle = 45.0f; + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(1, 1), Size2f(sqrt(2.0f), 20), 45.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_PARTIAL); - CV_Assert(vertices.size() == 3); - - vector possibleVertices(3); - - possibleVertices[0] = Point2f(1.0f, 1.0f); - possibleVertices[1] = Point2f(0.0f, 1.0f); - possibleVertices[2] = Point2f(1.0f, 0.0f); - - for( size_t i = 0; i < vertices.size(); i++ ) - { - double bestR = DBL_MAX; - - for( size_t j = 0; j < possibleVertices.size(); j++ ) - { - double dx = vertices[i].x - possibleVertices[j].x; - double dy = vertices[i].y - possibleVertices[j].y; - double r = sqrt(dx*dx + dy*dy); - bestR = std::min(bestR, r); - } - - CV_Assert(bestR < ACCURACY); - } + vector targetVertices(3); + targetVertices[0] = Point2f(1.0f, 0.0f); + targetVertices[1] = Point2f(1.0f, 1.0f); + targetVertices[2] = Point2f(0.0f, 1.0f); + compare(vertices, targetVertices); } void CV_RotatedRectangleIntersectionTest::test4() { // full intersection, rectangles of same size directly on top of each other - - RotatedRect rect1, rect2; - - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 0; - - rect2.center.x = 0; - rect2.center.y = 0; - rect2.size.width = 2; - rect2.size.height = 2; - rect2.angle = 0; + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(0, 0), Size2f(2, 2), 0.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_FULL); - CV_Assert(vertices.size() == 4); - - vector possibleVertices(4); - - possibleVertices[0] = Point2f(-1.0f, 1.0f); - possibleVertices[1] = Point2f(1.0f, -1.0f); - possibleVertices[2] = Point2f(-1.0f, -1.0f); - possibleVertices[3] = Point2f(1.0f, 1.0f); - - for( size_t i = 0; i < vertices.size(); i++ ) - { - double bestR = DBL_MAX; - - for( size_t j = 0; j < possibleVertices.size(); j++ ) - { - double dx = vertices[i].x - possibleVertices[j].x; - double dy = vertices[i].y - possibleVertices[j].y; - double r = sqrt(dx*dx + dy*dy); - - bestR = std::min(bestR, r); - } - CV_Assert(bestR < ACCURACY); - } + vector targetVertices(4); + targetVertices[0] = Point2f(-1.0f, 1.0f); + targetVertices[1] = Point2f(-1.0f, -1.0f); + targetVertices[2] = Point2f(1.0f, -1.0f); + targetVertices[3] = Point2f(1.0f, 1.0f); + compare(vertices, targetVertices); } void CV_RotatedRectangleIntersectionTest::test5() { // partial intersection, rectangle on top rotated 45 degrees - - RotatedRect rect1, rect2; - - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 0; - - rect2.center.x = 0; - rect2.center.y = 0; - rect2.size.width = 2; - rect2.size.height = 2; - rect2.angle = 45.0f; + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(0, 0), Size2f(2, 2), 45.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_PARTIAL); - CV_Assert(vertices.size() == 8); - - vector possibleVertices(8); - possibleVertices[0] = Point2f(-1.0f, -0.414214f); - possibleVertices[1] = Point2f(-1.0f, 0.414214f); - possibleVertices[2] = Point2f(-0.414214f, -1.0f); - possibleVertices[3] = Point2f(0.414214f, -1.0f); - possibleVertices[4] = Point2f(1.0f, -0.414214f); - possibleVertices[5] = Point2f(1.0f, 0.414214f); - possibleVertices[6] = Point2f(0.414214f, 1.0f); - possibleVertices[7] = Point2f(-0.414214f, 1.0f); - - for( size_t i = 0; i < vertices.size(); i++ ) - { - double bestR = DBL_MAX; - - for( size_t j = 0; j < possibleVertices.size(); j++ ) - { - double dx = vertices[i].x - possibleVertices[j].x; - double dy = vertices[i].y - possibleVertices[j].y; - double r = sqrt(dx*dx + dy*dy); - - bestR = std::min(bestR, r); - } - - CV_Assert(bestR < ACCURACY); - } + vector targetVertices(8); + targetVertices[0] = Point2f(-1.0f, -0.414214f); + targetVertices[1] = Point2f(-0.414214f, -1.0f); + targetVertices[2] = Point2f(0.414214f, -1.0f); + targetVertices[3] = Point2f(1.0f, -0.414214f); + targetVertices[4] = Point2f(1.0f, 0.414214f); + targetVertices[5] = Point2f(0.414214f, 1.0f); + targetVertices[6] = Point2f(-0.414214f, 1.0f); + targetVertices[7] = Point2f(-1.0f, 0.414214f); + compare(vertices, targetVertices); } void CV_RotatedRectangleIntersectionTest::test6() { // 6 - partial intersection, rectangle on top of different size - - RotatedRect rect1, rect2; - - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 0; - - rect2.center.x = 0; - rect2.center.y = 0; - rect2.size.width = 2; - rect2.size.height = 10; - rect2.angle = 0; + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(0, 0), Size2f(2, 10), 0.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_PARTIAL); - CV_Assert(vertices.size() == 4); - - vector possibleVertices(4); - possibleVertices[0] = Point2f(1.0f, 1.0f); - possibleVertices[1] = Point2f(1.0f, -1.0f); - possibleVertices[2] = Point2f(-1.0f, -1.0f); - possibleVertices[3] = Point2f(-1.0f, 1.0f); - - for( size_t i = 0; i < vertices.size(); i++ ) - { - double bestR = DBL_MAX; - - for( size_t j = 0; j < possibleVertices.size(); j++ ) - { - double dx = vertices[i].x - possibleVertices[j].x; - double dy = vertices[i].y - possibleVertices[j].y; - double r = sqrt(dx*dx + dy*dy); - - bestR = std::min(bestR, r); - } - - CV_Assert(bestR < ACCURACY); - } + vector targetVertices(4); + targetVertices[0] = Point2f(-1.0f, -1.0f); + targetVertices[1] = Point2f(1.0f, -1.0f); + targetVertices[2] = Point2f(1.0f, 1.0f); + targetVertices[3] = Point2f(-1.0f, 1.0f); + compare(vertices, targetVertices); } void CV_RotatedRectangleIntersectionTest::test7() { // full intersection, rectangle fully enclosed in the other - - RotatedRect rect1, rect2; - - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 12.34f; - rect1.size.height = 56.78f; - rect1.angle = 0; - - rect2.center.x = 0; - rect2.center.y = 0; - rect2.size.width = 2; - rect2.size.height = 2; - rect2.angle = 0; + RotatedRect rect1(Point2f(0, 0), Size2f(12.34f, 56.78f), 0.0f); + RotatedRect rect2(Point2f(0, 0), Size2f(2, 2), 0.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_FULL); - CV_Assert(vertices.size() == 4); - - vector possibleVertices(4); - possibleVertices[0] = Point2f(1.0f, 1.0f); - possibleVertices[1] = Point2f(1.0f, -1.0f); - possibleVertices[2] = Point2f(-1.0f, -1.0f); - possibleVertices[3] = Point2f(-1.0f, 1.0f); - - for( size_t i = 0; i < vertices.size(); i++ ) - { - double bestR = DBL_MAX; + vector targetVertices(4); + targetVertices[0] = Point2f(-1.0f, 1.0f); + targetVertices[1] = Point2f(-1.0f, -1.0f); + targetVertices[2] = Point2f(1.0f, -1.0f); + targetVertices[3] = Point2f(1.0f, 1.0f); + compare(vertices, targetVertices); +} - for( size_t j = 0; j < possibleVertices.size(); j++ ) - { - double dx = vertices[i].x - possibleVertices[j].x; - double dy = vertices[i].y - possibleVertices[j].y; - double r = sqrt(dx*dx + dy*dy); +void CV_RotatedRectangleIntersectionTest::test8() +{ + // intersection by a single vertex + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(2, 2), Size2f(2, 2), 0.0f); - bestR = std::min(bestR, r); - } + vector vertices; + int ret = rotatedRectangleIntersection(rect1, rect2, vertices); - CV_Assert(bestR < ACCURACY); - } + CV_Assert(ret == INTERSECT_PARTIAL); + compare(vertices, vector(1, Point2f(1.0f, 1.0f))); } -void CV_RotatedRectangleIntersectionTest::test8() +void CV_RotatedRectangleIntersectionTest::test9() { // full intersection, rectangle fully enclosed in the other + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(2, 0), Size2f(2, 123.45f), 0.0f); - RotatedRect rect1, rect2; + vector vertices; + int ret = rotatedRectangleIntersection(rect1, rect2, vertices); + + CV_Assert(ret == INTERSECT_PARTIAL); - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 0; + vector targetVertices(2); + targetVertices[0] = Point2f(1.0f, -1.0f); + targetVertices[1] = Point2f(1.0f, 1.0f); + compare(vertices, targetVertices); +} - rect2.center.x = 2; - rect2.center.y = 2; - rect2.size.width = 2; - rect2.size.height = 2; - rect2.angle = 0; +void CV_RotatedRectangleIntersectionTest::test10() +{ + // three points of rect2 are inside rect1. + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(0, 0.5), Size2f(1, 1), 45.0f); vector vertices; - int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_PARTIAL); - CV_Assert(vertices.size() == 1); - - double dx = vertices[0].x - 1; - double dy = vertices[0].y - 1; - double r = sqrt(dx*dx + dy*dy); - CV_Assert(r < ACCURACY); + vector targetVertices(5); + targetVertices[0] = Point2f(0.207107f, 1.0f); + targetVertices[1] = Point2f(-0.207107f, 1.0f); + targetVertices[2] = Point2f(-0.707107f, 0.5f); + targetVertices[3] = Point2f(0.0f, -0.207107f); + targetVertices[4] = Point2f(0.707107f, 0.5f); + compare(vertices, targetVertices); } -void CV_RotatedRectangleIntersectionTest::test9() +void CV_RotatedRectangleIntersectionTest::test11() { - // full intersection, rectangle fully enclosed in the other + RotatedRect rect1(Point2f(0, 0), Size2f(4, 2), 0.0f); + RotatedRect rect2(Point2f(0, 0), Size2f(2, 2), -45.0f); - RotatedRect rect1, rect2; + vector vertices; + int ret = rotatedRectangleIntersection(rect1, rect2, vertices); - rect1.center.x = 0; - rect1.center.y = 0; - rect1.size.width = 2; - rect1.size.height = 2; - rect1.angle = 0; + CV_Assert(ret == INTERSECT_PARTIAL); - rect2.center.x = 2; - rect2.center.y = 0; - rect2.size.width = 2; - rect2.size.height = 123.45f; - rect2.angle = 0; + vector targetVertices(6); + targetVertices[0] = Point2f(-0.414214f, -1.0f); + targetVertices[1] = Point2f(0.414213f, -1.0f); + targetVertices[2] = Point2f(1.41421f, 0.0f); + targetVertices[3] = Point2f(0.414214f, 1.0f); + targetVertices[4] = Point2f(-0.414213f, 1.0f); + targetVertices[5] = Point2f(-1.41421f, 0.0f); + compare(vertices, targetVertices); +} - vector vertices; +void CV_RotatedRectangleIntersectionTest::test12() +{ + RotatedRect rect1(Point2f(0, 0), Size2f(2, 2), 0.0f); + RotatedRect rect2(Point2f(0, 1), Size2f(1, 1), 0.0f); + vector vertices; int ret = rotatedRectangleIntersection(rect1, rect2, vertices); CV_Assert(ret == INTERSECT_PARTIAL); - CV_Assert(vertices.size() == 2); - vector possibleVertices(2); + vector targetVertices(4); + targetVertices[0] = Point2f(-0.5f, 1.0f); + targetVertices[1] = Point2f(-0.5f, 0.5f); + targetVertices[2] = Point2f(0.5f, 0.5f); + targetVertices[3] = Point2f(0.5f, 1.0f); + compare(vertices, targetVertices); +} - possibleVertices[0] = Point2f(1.0f, 1.0f); - possibleVertices[1] = Point2f(1.0f, -1.0f); +void CV_RotatedRectangleIntersectionTest::test13() +{ + RotatedRect rect1(Point2f(0, 0), Size2f(1, 3), 0.0f); + RotatedRect rect2(Point2f(0, 1), Size2f(3, 1), 0.0f); - for( size_t i = 0; i < vertices.size(); i++ ) - { - double bestR = DBL_MAX; + vector vertices; + int ret = rotatedRectangleIntersection(rect1, rect2, vertices); - for( size_t j = 0; j < possibleVertices.size(); j++ ) - { - double dx = vertices[i].x - possibleVertices[j].x; - double dy = vertices[i].y - possibleVertices[j].y; - double r = sqrt(dx*dx + dy*dy); + CV_Assert(ret == INTERSECT_PARTIAL); - bestR = std::min(bestR, r); - } + vector targetVertices(4); + targetVertices[0] = Point2f(-0.5f, 0.5f); + targetVertices[1] = Point2f(0.5f, 0.5f); + targetVertices[2] = Point2f(0.5f, 1.5f); + targetVertices[3] = Point2f(-0.5f, 1.5f); + compare(vertices, targetVertices); +} - CV_Assert(bestR < ACCURACY); +void CV_RotatedRectangleIntersectionTest::test14() +{ + const int kNumTests = 100; + const int kWidth = 5; + const int kHeight = 5; + RotatedRect rects[2]; + std::vector inter; + for (int i = 0; i < kNumTests; ++i) + { + for (int j = 0; j < 2; ++j) + { + rects[j].center = Point2f((float)(rand() % kWidth), (float)(rand() % kHeight)); + rects[j].size = Size2f(rand() % kWidth + 1.0f, rand() % kHeight + 1.0f); + rects[j].angle = (float)(rand() % 360); + } + rotatedRectangleIntersection(rects[0], rects[1], inter); + ASSERT_TRUE(inter.size() < 4 || isContourConvex(inter)); } } diff --git a/samples/dnn/text_detection.cpp b/samples/dnn/text_detection.cpp index 48157d8a13..5abe6b6884 100644 --- a/samples/dnn/text_detection.cpp +++ b/samples/dnn/text_detection.cpp @@ -124,17 +124,14 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, const int height = scores.size[2]; const int width = scores.size[3]; - const int planeSize = height * width; - - float* scoresData = (float*)scores.data; - float* geometryData = (float*)geometry.data; - float* x0_data = geometryData; - float* x1_data = geometryData + planeSize; - float* x2_data = geometryData + planeSize * 2; - float* x3_data = geometryData + planeSize * 3; - float* anglesData = geometryData + planeSize * 4; for (int y = 0; y < height; ++y) { + const float* scoresData = scores.ptr(0, 0, y); + const float* x0_data = geometry.ptr(0, 0, y); + const float* x1_data = geometry.ptr(0, 1, y); + const float* x2_data = geometry.ptr(0, 2, y); + const float* x3_data = geometry.ptr(0, 3, y); + const float* anglesData = geometry.ptr(0, 4, y); for (int x = 0; x < width; ++x) { float score = scoresData[x]; @@ -142,7 +139,6 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, continue; // Decode a prediction. - // Multiple by 4 because feature maps are 4 time less than input image. float offsetX = x * 4.0f, offsetY = y * 4.0f; float angle = anglesData[x]; @@ -159,11 +155,5 @@ void decode(const Mat& scores, const Mat& geometry, float scoreThresh, detections.push_back(r); confidences.push_back(score); } - scoresData += width; - x0_data += width; - x1_data += width; - x2_data += width; - x3_data += width; - anglesData += width; } }