mirror of https://github.com/opencv/opencv.git
Merge pull request #18287 from mpashchenkov:mp/ocv-gapi-blue-branch
[G-API]: Add four kernels to parse NN outputs & provide information in Streaming scenarios * Kernels from GL "blue" branch, acc and perf tests * Code cleanup * Output fix * Comment fix * Added new file for parsers, stylistic corrections * Added end line * Namespace fix * Code cleanup * nnparsers.hpp moved to gapi/infer/, nnparsers -> parsers * Removed cv:: from parsers.hpppull/18365/head
parent
830d8d6b75
commit
a63cee2139
16 changed files with 1423 additions and 1 deletions
@ -0,0 +1,125 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
|
||||
#ifndef OPENCV_GAPI_PARSERS_HPP |
||||
#define OPENCV_GAPI_PARSERS_HPP |
||||
|
||||
#include <utility> // std::tuple |
||||
|
||||
#include <opencv2/gapi/gmat.hpp> |
||||
#include <opencv2/gapi/gkernel.hpp> |
||||
|
||||
namespace cv { namespace gapi { |
||||
namespace nn { |
||||
namespace parsers { |
||||
using GRects = GArray<Rect>; |
||||
using GDetections = std::tuple<GArray<Rect>, GArray<int>>; |
||||
|
||||
G_TYPED_KERNEL(GParseSSDBL, <GDetections(GMat, GOpaque<Size>, float, int)>, |
||||
"org.opencv.nn.parsers.parseSSD_BL") { |
||||
static std::tuple<GArrayDesc,GArrayDesc> outMeta(const GMatDesc&, const GOpaqueDesc&, float, int) { |
||||
return std::make_tuple(empty_array_desc(), empty_array_desc()); |
||||
} |
||||
}; |
||||
|
||||
G_TYPED_KERNEL(GParseSSD, <GRects(GMat, GOpaque<Size>, float, bool, bool)>, |
||||
"org.opencv.nn.parsers.parseSSD") { |
||||
static GArrayDesc outMeta(const GMatDesc&, const GOpaqueDesc&, float, bool, bool) { |
||||
return empty_array_desc(); |
||||
} |
||||
}; |
||||
|
||||
G_TYPED_KERNEL(GParseYolo, <GDetections(GMat, GOpaque<Size>, float, float, std::vector<float>)>, |
||||
"org.opencv.nn.parsers.parseYolo") { |
||||
static std::tuple<GArrayDesc, GArrayDesc> outMeta(const GMatDesc&, const GOpaqueDesc&, |
||||
float, float, const std::vector<float>&) { |
||||
return std::make_tuple(empty_array_desc(), empty_array_desc()); |
||||
} |
||||
static const std::vector<float>& defaultAnchors() { |
||||
static std::vector<float> anchors { |
||||
0.57273f, 0.677385f, 1.87446f, 2.06253f, 3.33843f, 5.47434f, 7.88282f, 3.52778f, 9.77052f, 9.16828f |
||||
}; |
||||
return anchors; |
||||
} |
||||
}; |
||||
} // namespace parsers
|
||||
} // namespace nn
|
||||
|
||||
/** @brief Parses output of SSD network.
|
||||
|
||||
Extracts detection information (box, confidence, label) from SSD output and |
||||
filters it by given confidence and label. |
||||
|
||||
@note Function textual ID is "org.opencv.nn.parsers.parseSSD_BL" |
||||
|
||||
@param in Input CV_32F tensor with {1,1,N,7} dimensions. |
||||
@param inSz Size to project detected boxes to (size of the input image). |
||||
@param confidenceThreshold If confidence of the |
||||
detection is smaller than confidence threshold, detection is rejected. |
||||
@param filterLabel If provided (!= -1), only detections with |
||||
given label will get to the output. |
||||
@return a tuple with a vector of detected boxes and a vector of appropriate labels. |
||||
*/ |
||||
GAPI_EXPORTS std::tuple<GArray<Rect>, GArray<int>> parseSSD(const GMat& in, |
||||
const GOpaque<Size>& inSz, |
||||
const float confidenceThreshold = 0.5f, |
||||
const int filterLabel = -1); |
||||
|
||||
/** @overload
|
||||
Extracts detection information (box, confidence) from SSD output and |
||||
filters it by given confidence and by going out of bounds. |
||||
|
||||
@note Function textual ID is "org.opencv.nn.parsers.parseSSD" |
||||
|
||||
@param in Input CV_32F tensor with {1,1,N,7} dimensions. |
||||
@param inSz Size to project detected boxes to (size of the input image). |
||||
@param confidenceThreshold If confidence of the |
||||
detection is smaller than confidence threshold, detection is rejected. |
||||
@param alignmentToSquare If provided true, bounding boxes are extended to squares. |
||||
The center of the rectangle remains unchanged, the side of the square is |
||||
the larger side of the rectangle. |
||||
@param filterOutOfBounds If provided true, out-of-frame boxes are filtered. |
||||
@return a vector of detected bounding boxes. |
||||
*/ |
||||
GAPI_EXPORTS GArray<Rect> parseSSD(const GMat& in, |
||||
const GOpaque<Size>& inSz, |
||||
const float confidenceThreshold = 0.5f, |
||||
const bool alignmentToSquare = false, |
||||
const bool filterOutOfBounds = false); |
||||
|
||||
/** @brief Parses output of Yolo network.
|
||||
|
||||
Extracts detection information (box, confidence, label) from Yolo output, |
||||
filters it by given confidence and performs non-maximum supression for overlapping boxes. |
||||
|
||||
@note Function textual ID is "org.opencv.nn.parsers.parseYolo" |
||||
|
||||
@param in Input CV_32F tensor with {1,13,13,N} dimensions, N should satisfy: |
||||
\f[\texttt{N} = (\texttt{num_classes} + \texttt{5}) * \texttt{5},\f] |
||||
where num_classes - a number of classes Yolo network was trained with. |
||||
@param inSz Size to project detected boxes to (size of the input image). |
||||
@param confidenceThreshold If confidence of the |
||||
detection is smaller than confidence threshold, detection is rejected. |
||||
@param nmsThreshold Non-maximum supression threshold which controls minimum |
||||
relative box intersection area required for rejecting the box with a smaller confidence. |
||||
If 1.f, nms is not performed and no boxes are rejected. |
||||
@param anchors Anchors Yolo network was trained with. |
||||
@note The default anchor values are taken from openvinotoolkit docs: |
||||
https://docs.openvinotoolkit.org/latest/omz_models_intel_yolo_v2_tiny_vehicle_detection_0001_description_yolo_v2_tiny_vehicle_detection_0001.html#output.
|
||||
@return a tuple with a vector of detected boxes and a vector of appropriate labels. |
||||
*/ |
||||
GAPI_EXPORTS std::tuple<GArray<Rect>, GArray<int>> parseYolo(const GMat& in, |
||||
const GOpaque<Size>& inSz, |
||||
const float confidenceThreshold = 0.5f, |
||||
const float nmsThreshold = 0.5f, |
||||
const std::vector<float>& anchors |
||||
= nn::parsers::GParseYolo::defaultAnchors()); |
||||
|
||||
} // namespace gapi
|
||||
} // namespace cv
|
||||
|
||||
#endif // OPENCV_GAPI_PARSERS_HPP
|
@ -0,0 +1,44 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#include <opencv2/gapi/infer/parsers.hpp> |
||||
|
||||
#include <tuple> |
||||
#include <numeric> |
||||
|
||||
namespace cv { namespace gapi { |
||||
|
||||
nn::parsers::GDetections parseSSD(const GMat& in, |
||||
const GOpaque<Size>& inSz, |
||||
const float confidenceThreshold, |
||||
const int filterLabel) |
||||
{ |
||||
return nn::parsers::GParseSSDBL::on(in, inSz, confidenceThreshold, filterLabel); |
||||
} |
||||
|
||||
nn::parsers::GRects parseSSD(const GMat& in, |
||||
const GOpaque<Size>& inSz, |
||||
const float confidenceThreshold, |
||||
const bool alignmentToSquare, |
||||
const bool filterOutOfBounds) |
||||
{ |
||||
return nn::parsers::GParseSSD::on(in, inSz, confidenceThreshold, alignmentToSquare, filterOutOfBounds); |
||||
} |
||||
|
||||
nn::parsers::GDetections parseYolo(const GMat& in, |
||||
const GOpaque<Size>& inSz, |
||||
const float confidenceThreshold, |
||||
const float nmsThreshold, |
||||
const std::vector<float>& anchors) |
||||
{ |
||||
return nn::parsers::GParseYolo::on(in, inSz, confidenceThreshold, nmsThreshold, anchors); |
||||
} |
||||
|
||||
} //namespace gapi
|
||||
} //namespace cv
|
@ -0,0 +1,338 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
#include "gnnparsers.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace gapi |
||||
{ |
||||
namespace nn |
||||
{ |
||||
class YoloParser |
||||
{ |
||||
public: |
||||
YoloParser(const float* out, const int side, const int lcoords, const int lclasses) |
||||
: m_out(out), m_side(side), m_lcoords(lcoords), m_lclasses(lclasses) |
||||
{} |
||||
|
||||
float scale(const int i, const int b) |
||||
{ |
||||
int obj_index = index(i, b, m_lcoords); |
||||
return m_out[obj_index]; |
||||
} |
||||
|
||||
double x(const int i, const int b) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
int col = i % m_side; |
||||
return (col + m_out[box_index]) / m_side; |
||||
} |
||||
|
||||
double y(const int i, const int b) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
int row = i / m_side; |
||||
return (row + m_out[box_index + m_side * m_side]) / m_side; |
||||
} |
||||
|
||||
double width(const int i, const int b, const float anchor) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
return std::exp(m_out[box_index + 2 * m_side * m_side]) * anchor / m_side; |
||||
} |
||||
|
||||
double height(const int i, const int b, const float anchor) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
return std::exp(m_out[box_index + 3 * m_side * m_side]) * anchor / m_side; |
||||
} |
||||
|
||||
float classConf(const int i, const int b, const int label) |
||||
{ |
||||
int class_index = index(i, b, m_lcoords + 1 + label); |
||||
return m_out[class_index]; |
||||
} |
||||
|
||||
cv::Rect toBox(const double x, const double y, const double h, const double w, const cv::Size& in_sz) |
||||
{ |
||||
auto h_scale = in_sz.height; |
||||
auto w_scale = in_sz.width; |
||||
cv::Rect r; |
||||
r.x = static_cast<int>((x - w / 2) * w_scale); |
||||
r.y = static_cast<int>((y - h / 2) * h_scale); |
||||
r.width = static_cast<int>(w * w_scale); |
||||
r.height = static_cast<int>(h * h_scale); |
||||
return r; |
||||
} |
||||
|
||||
private: |
||||
const float* m_out = nullptr; |
||||
int m_side = 0, m_lcoords = 0, m_lclasses = 0; |
||||
|
||||
int index(const int i, const int b, const int entry) |
||||
{ |
||||
return b * m_side * m_side * (m_lcoords + m_lclasses + 1) + entry * m_side * m_side + i; |
||||
} |
||||
}; |
||||
|
||||
struct YoloParams |
||||
{ |
||||
int num = 5; |
||||
int coords = 4; |
||||
}; |
||||
|
||||
struct Detection |
||||
{ |
||||
Detection(const cv::Rect& in_rect, const float in_conf, const int in_label) |
||||
: rect(in_rect), conf(in_conf), label(in_label) |
||||
{} |
||||
cv::Rect rect; |
||||
float conf = 0.0f; |
||||
int label = 0; |
||||
}; |
||||
|
||||
class SSDParser |
||||
{ |
||||
public: |
||||
SSDParser(const cv::MatSize& in_ssd_dims, const cv::Size& in_size, const float* data) |
||||
: m_dims(in_ssd_dims), m_maxProp(in_ssd_dims[2]), m_objSize(in_ssd_dims[3]), |
||||
m_data(data), m_surface(cv::Rect({0,0}, in_size)), m_size(in_size) |
||||
{ |
||||
GAPI_Assert(in_ssd_dims.dims() == 4u); // Fixed output layout
|
||||
GAPI_Assert(m_objSize == 7); // Fixed SSD object size
|
||||
} |
||||
|
||||
void adjustBoundingBox(cv::Rect& boundingBox) |
||||
{ |
||||
auto w = boundingBox.width; |
||||
auto h = boundingBox.height; |
||||
|
||||
boundingBox.x -= static_cast<int>(0.067 * w); |
||||
boundingBox.y -= static_cast<int>(0.028 * h); |
||||
|
||||
boundingBox.width += static_cast<int>(0.15 * w); |
||||
boundingBox.height += static_cast<int>(0.13 * h); |
||||
|
||||
if (boundingBox.width < boundingBox.height) |
||||
{ |
||||
auto dx = (boundingBox.height - boundingBox.width); |
||||
boundingBox.x -= dx / 2; |
||||
boundingBox.width += dx; |
||||
} |
||||
else |
||||
{ |
||||
auto dy = (boundingBox.width - boundingBox.height); |
||||
boundingBox.y -= dy / 2; |
||||
boundingBox.height += dy; |
||||
} |
||||
} |
||||
|
||||
std::tuple<cv::Rect, float, float, int> extract(const size_t step) |
||||
{ |
||||
const float* it = m_data + step * m_objSize; |
||||
float image_id = it[0]; |
||||
int label = static_cast<int>(it[1]); |
||||
float confidence = it[2]; |
||||
float rc_left = it[3]; |
||||
float rc_top = it[4]; |
||||
float rc_right = it[5]; |
||||
float rc_bottom = it[6]; |
||||
|
||||
cv::Rect rc; // Map relative coordinates to the original image scale
|
||||
rc.x = static_cast<int>(rc_left * m_size.width); |
||||
rc.y = static_cast<int>(rc_top * m_size.height); |
||||
rc.width = static_cast<int>(rc_right * m_size.width) - rc.x; |
||||
rc.height = static_cast<int>(rc_bottom * m_size.height) - rc.y; |
||||
return std::make_tuple(rc, image_id, confidence, label); |
||||
} |
||||
|
||||
int getMaxProposals() |
||||
{ |
||||
return m_maxProp; |
||||
} |
||||
|
||||
cv::Rect getSurface() |
||||
{ |
||||
return m_surface; |
||||
} |
||||
|
||||
private: |
||||
const cv::MatSize m_dims; |
||||
int m_maxProp = 0, m_objSize = 0; |
||||
const float* m_data = nullptr; |
||||
const cv::Rect m_surface; |
||||
const cv::Size m_size; |
||||
}; |
||||
} // namespace nn
|
||||
} // namespace gapi
|
||||
|
||||
void parseSSDBL(const cv::Mat& in_ssd_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const int filter_label, |
||||
std::vector<cv::Rect>& out_boxes, |
||||
std::vector<int>& out_labels) |
||||
{ |
||||
cv::gapi::nn::SSDParser parser(in_ssd_result.size, in_size, in_ssd_result.ptr<float>()); |
||||
out_boxes.clear(); |
||||
out_labels.clear(); |
||||
cv::Rect rc; |
||||
float image_id, confidence; |
||||
int label; |
||||
const size_t range = parser.getMaxProposals(); |
||||
for (size_t i = 0; i < range; ++i) |
||||
{ |
||||
std::tie(rc, image_id, confidence, label) = parser.extract(i); |
||||
|
||||
if (image_id < 0.f) |
||||
{ |
||||
break; // marks end-of-detections
|
||||
} |
||||
|
||||
if (confidence < confidence_threshold || |
||||
(filter_label != -1 && label != filter_label)) |
||||
{ |
||||
continue; // filter out object classes if filter is specified
|
||||
} // and skip objects with low confidence
|
||||
out_boxes.emplace_back(rc & parser.getSurface()); |
||||
out_labels.emplace_back(label); |
||||
} |
||||
} |
||||
|
||||
void parseSSD(const cv::Mat& in_ssd_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const bool alignment_to_square, |
||||
const bool filter_out_of_bounds, |
||||
std::vector<cv::Rect>& out_boxes) |
||||
{ |
||||
cv::gapi::nn::SSDParser parser(in_ssd_result.size, in_size, in_ssd_result.ptr<float>()); |
||||
out_boxes.clear(); |
||||
cv::Rect rc; |
||||
float image_id, confidence; |
||||
int label; |
||||
const size_t range = parser.getMaxProposals(); |
||||
for (size_t i = 0; i < range; ++i) |
||||
{ |
||||
std::tie(rc, image_id, confidence, label) = parser.extract(i); |
||||
|
||||
if (image_id < 0.f) |
||||
{ |
||||
break; // marks end-of-detections
|
||||
} |
||||
if (confidence < confidence_threshold) |
||||
{ |
||||
continue; // skip objects with low confidence
|
||||
} |
||||
|
||||
if (alignment_to_square) |
||||
{ |
||||
parser.adjustBoundingBox(rc); |
||||
} |
||||
|
||||
const auto clipped_rc = rc & parser.getSurface(); |
||||
if (filter_out_of_bounds) |
||||
{ |
||||
if (clipped_rc.area() != rc.area()) |
||||
{ |
||||
continue; |
||||
} |
||||
} |
||||
out_boxes.emplace_back(clipped_rc); |
||||
} |
||||
} |
||||
|
||||
void parseYolo(const cv::Mat& in_yolo_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const float nms_threshold, |
||||
const std::vector<float>& anchors, |
||||
std::vector<cv::Rect>& out_boxes, |
||||
std::vector<int>& out_labels) |
||||
{ |
||||
const auto& dims = in_yolo_result.size; |
||||
GAPI_Assert(dims.dims() == 4); |
||||
GAPI_Assert(dims[0] == 1); |
||||
GAPI_Assert(dims[1] == 13); |
||||
GAPI_Assert(dims[2] == 13); |
||||
GAPI_Assert(dims[3] % 5 == 0); // 5 boxes
|
||||
const auto num_classes = dims[3] / 5 - 5; |
||||
GAPI_Assert(num_classes > 0); |
||||
GAPI_Assert(0 < nms_threshold && nms_threshold <= 1); |
||||
out_boxes.clear(); |
||||
out_labels.clear(); |
||||
gapi::nn::YoloParams params; |
||||
constexpr auto side = 13; |
||||
constexpr auto side_square = side * side; |
||||
const auto output = in_yolo_result.ptr<float>(); |
||||
|
||||
gapi::nn::YoloParser parser(output, side, params.coords, num_classes); |
||||
|
||||
std::vector<gapi::nn::Detection> detections; |
||||
|
||||
for (int i = 0; i < side_square; ++i) |
||||
{ |
||||
for (int b = 0; b < params.num; ++b) |
||||
{ |
||||
float scale = parser.scale(i, b); |
||||
if (scale < confidence_threshold) |
||||
{ |
||||
continue; |
||||
} |
||||
double x = parser.x(i, b); |
||||
double y = parser.y(i, b); |
||||
double height = parser.height(i, b, anchors[2 * b + 1]); |
||||
double width = parser.width(i, b, anchors[2 * b]); |
||||
|
||||
for (int label = 0; label < num_classes; ++label) |
||||
{ |
||||
float prob = scale * parser.classConf(i,b,label); |
||||
if (prob < confidence_threshold) |
||||
{ |
||||
continue; |
||||
} |
||||
auto box = parser.toBox(x, y, height, width, in_size); |
||||
detections.emplace_back(gapi::nn::Detection(box, prob, label)); |
||||
} |
||||
} |
||||
} |
||||
std::stable_sort(std::begin(detections), std::end(detections), |
||||
[](const gapi::nn::Detection& a, const gapi::nn::Detection& b) |
||||
{ |
||||
return a.conf > b.conf; |
||||
}); |
||||
|
||||
if (nms_threshold < 1.0f) |
||||
{ |
||||
for (const auto& d : detections) |
||||
{ |
||||
// Reject boxes which overlap with previously pushed ones
|
||||
// (They are sorted by confidence, so rejected box
|
||||
// always has a smaller confidence
|
||||
if (std::end(out_boxes) == |
||||
std::find_if(std::begin(out_boxes), std::end(out_boxes), |
||||
[&d, nms_threshold](const cv::Rect& r) |
||||
{ |
||||
float rectOverlap = 1.f - static_cast<float>(jaccardDistance(r, d.rect)); |
||||
return rectOverlap > nms_threshold; |
||||
})) |
||||
{ |
||||
out_boxes. emplace_back(d.rect); |
||||
out_labels.emplace_back(d.label); |
||||
} |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
for (const auto& d: detections) |
||||
{ |
||||
out_boxes. emplace_back(d.rect); |
||||
out_labels.emplace_back(d.label); |
||||
} |
||||
} |
||||
} |
||||
} // namespace cv
|
@ -0,0 +1,36 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
#include <opencv2/gapi/infer/parsers.hpp> |
||||
|
||||
#ifndef OPENCV_NNPARSERS_OCV_HPP |
||||
#define OPENCV_NNPARSERS_OCV_HPP |
||||
|
||||
namespace cv |
||||
{ |
||||
void parseSSDBL(const cv::Mat& in_ssd_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const int filter_label, |
||||
std::vector<cv::Rect>& out_boxes, |
||||
std::vector<int>& out_labels); |
||||
|
||||
void parseSSD(const cv::Mat& in_ssd_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const bool alignment_to_square, |
||||
const bool filter_out_of_bounds, |
||||
std::vector<cv::Rect>& out_boxes); |
||||
|
||||
void parseYolo(const cv::Mat& in_yolo_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const float nms_threshold, |
||||
const std::vector<float>& anchors, |
||||
std::vector<cv::Rect>& out_boxes, |
||||
std::vector<int>& out_labels); |
||||
} |
||||
#endif // OPENCV_NNPARSERS_OCV_HPP
|
@ -0,0 +1,397 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
|
||||
|
||||
#ifndef OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP |
||||
#define OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP |
||||
|
||||
#include "gapi_tests_common.hpp" |
||||
#include "../../include/opencv2/gapi/infer/parsers.hpp" |
||||
|
||||
namespace opencv_test |
||||
{ |
||||
class ParserSSDTest |
||||
{ |
||||
public: |
||||
cv::Mat generateSSDoutput(const cv::Size& in_sz) |
||||
{ |
||||
constexpr int maxN = 200; |
||||
constexpr int objSize = 7; |
||||
std::vector<int> dims{ 1, 1, maxN, objSize }; |
||||
cv::Mat mat(dims, CV_32FC1); |
||||
auto data = mat.ptr<float>(); |
||||
|
||||
for (int i = 0; i < maxN; ++i) |
||||
{ |
||||
float* it = data + i * objSize; |
||||
auto ssdIt = generateItem(i, in_sz); |
||||
it[0] = ssdIt.image_id; |
||||
it[1] = ssdIt.label; |
||||
it[2] = ssdIt.confidence; |
||||
it[3] = ssdIt.rc_left; |
||||
it[4] = ssdIt.rc_top; |
||||
it[5] = ssdIt.rc_right; |
||||
it[6] = ssdIt.rc_bottom; |
||||
} |
||||
return mat; |
||||
} |
||||
|
||||
void parseSSDref(const cv::Mat& in_ssd_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const bool alignment_to_square, |
||||
const bool filter_out_of_bounds, |
||||
std::vector<cv::Rect>& out_boxes) |
||||
{ |
||||
out_boxes.clear(); |
||||
const auto &in_ssd_dims = in_ssd_result.size; |
||||
CV_Assert(in_ssd_dims.dims() == 4u); |
||||
|
||||
const int MAX_PROPOSALS = in_ssd_dims[2]; |
||||
const int OBJECT_SIZE = in_ssd_dims[3]; |
||||
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
|
||||
|
||||
const float *data = in_ssd_result.ptr<float>(); |
||||
cv::Rect surface({0,0}, in_size), rc; |
||||
float image_id, confidence; |
||||
int label; |
||||
for (int i = 0; i < MAX_PROPOSALS; ++i) |
||||
{ |
||||
std::tie(rc, image_id, confidence, label) |
||||
= extract(data + i*OBJECT_SIZE, in_size); |
||||
if (image_id < 0.f) |
||||
{ |
||||
break; // marks end-of-detections
|
||||
} |
||||
|
||||
if (confidence < confidence_threshold) |
||||
{ |
||||
continue; // skip objects with low confidence
|
||||
} |
||||
|
||||
if (alignment_to_square) |
||||
{ |
||||
adjustBoundingBox(rc); |
||||
} |
||||
|
||||
const auto clipped_rc = rc & surface; |
||||
if (filter_out_of_bounds) |
||||
{ |
||||
if (clipped_rc.area() != rc.area()) |
||||
{ |
||||
continue; |
||||
} |
||||
} |
||||
out_boxes.emplace_back(clipped_rc); |
||||
} |
||||
} |
||||
|
||||
void parseSSDBLref(const cv::Mat& in_ssd_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const int filter_label, |
||||
std::vector<cv::Rect>& out_boxes, |
||||
std::vector<int>& out_labels) |
||||
{ |
||||
out_boxes.clear(); |
||||
out_labels.clear(); |
||||
const auto &in_ssd_dims = in_ssd_result.size; |
||||
CV_Assert(in_ssd_dims.dims() == 4u); |
||||
|
||||
const int MAX_PROPOSALS = in_ssd_dims[2]; |
||||
const int OBJECT_SIZE = in_ssd_dims[3]; |
||||
CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size
|
||||
cv::Rect surface({0,0}, in_size), rc; |
||||
float image_id, confidence; |
||||
int label; |
||||
const float *data = in_ssd_result.ptr<float>(); |
||||
for (int i = 0; i < MAX_PROPOSALS; i++) |
||||
{ |
||||
std::tie(rc, image_id, confidence, label) |
||||
= extract(data + i*OBJECT_SIZE, in_size); |
||||
if (image_id < 0.f) |
||||
{ |
||||
break; // marks end-of-detections
|
||||
} |
||||
|
||||
if (confidence < confidence_threshold || |
||||
(filter_label != -1 && label != filter_label)) |
||||
{ |
||||
continue; // filter out object classes if filter is specified
|
||||
} |
||||
|
||||
out_boxes.emplace_back(rc & surface); |
||||
out_labels.emplace_back(label); |
||||
} |
||||
} |
||||
|
||||
private: |
||||
void adjustBoundingBox(cv::Rect& boundingBox) |
||||
{ |
||||
auto w = boundingBox.width; |
||||
auto h = boundingBox.height; |
||||
|
||||
boundingBox.x -= static_cast<int>(0.067 * w); |
||||
boundingBox.y -= static_cast<int>(0.028 * h); |
||||
|
||||
boundingBox.width += static_cast<int>(0.15 * w); |
||||
boundingBox.height += static_cast<int>(0.13 * h); |
||||
|
||||
if (boundingBox.width < boundingBox.height) |
||||
{ |
||||
auto dx = (boundingBox.height - boundingBox.width); |
||||
boundingBox.x -= dx / 2; |
||||
boundingBox.width += dx; |
||||
} |
||||
else |
||||
{ |
||||
auto dy = (boundingBox.width - boundingBox.height); |
||||
boundingBox.y -= dy / 2; |
||||
boundingBox.height += dy; |
||||
} |
||||
} |
||||
|
||||
std::tuple<cv::Rect, float, float, int> extract(const float* it, |
||||
const cv::Size& in_size) |
||||
{ |
||||
float image_id = it[0]; |
||||
int label = static_cast<int>(it[1]); |
||||
float confidence = it[2]; |
||||
float rc_left = it[3]; |
||||
float rc_top = it[4]; |
||||
float rc_right = it[5]; |
||||
float rc_bottom = it[6]; |
||||
|
||||
cv::Rect rc; // map relative coordinates to the original image scale
|
||||
rc.x = static_cast<int>(rc_left * in_size.width); |
||||
rc.y = static_cast<int>(rc_top * in_size.height); |
||||
rc.width = static_cast<int>(rc_right * in_size.width) - rc.x; |
||||
rc.height = static_cast<int>(rc_bottom * in_size.height) - rc.y; |
||||
return std::make_tuple(rc, image_id, confidence, label); |
||||
} |
||||
|
||||
int randInRange(const int start, const int end) |
||||
{ |
||||
GAPI_Assert(start <= end); |
||||
return start + std::rand() % (end - start + 1); |
||||
} |
||||
|
||||
cv::Rect generateBox(const cv::Size& in_sz) |
||||
{ |
||||
// Generated rectangle can reside outside of the initial image by border pixels
|
||||
constexpr int border = 10; |
||||
constexpr int minW = 16; |
||||
constexpr int minH = 16; |
||||
cv::Rect box; |
||||
box.width = randInRange(minW, in_sz.width + 2*border); |
||||
box.height = randInRange(minH, in_sz.height + 2*border); |
||||
box.x = randInRange(-border, in_sz.width + border - box.width); |
||||
box.y = randInRange(-border, in_sz.height + border - box.height); |
||||
return box; |
||||
} |
||||
|
||||
struct SSDitem |
||||
{ |
||||
float image_id = 0.0f; |
||||
float label = 0.0f; |
||||
float confidence = 0.0f; |
||||
float rc_left = 0.0f; |
||||
float rc_top = 0.0f; |
||||
float rc_right = 0.0f; |
||||
float rc_bottom = 0.0f; |
||||
}; |
||||
|
||||
SSDitem generateItem(const int i, const cv::Size& in_sz) |
||||
{ |
||||
const auto normalize = [](int v, int range) { return static_cast<float>(v) / range; }; |
||||
|
||||
SSDitem it; |
||||
it.image_id = static_cast<float>(i); |
||||
it.label = static_cast<float>(randInRange(0, 9)); |
||||
it.confidence = static_cast<float>(std::rand()) / RAND_MAX; |
||||
auto box = generateBox(in_sz); |
||||
it.rc_left = normalize(box.x, in_sz.width); |
||||
it.rc_right = normalize(box.x + box.width, in_sz.width); |
||||
it.rc_top = normalize(box.y, in_sz.height); |
||||
it.rc_bottom = normalize(box.y + box.height, in_sz.height); |
||||
|
||||
return it; |
||||
} |
||||
}; |
||||
|
||||
class ParserYoloTest |
||||
{ |
||||
public: |
||||
cv::Mat generateYoloOutput(const int num_classes) |
||||
{ |
||||
std::vector<int> dims = { 1, 13, 13, (num_classes + 5) * 5 }; |
||||
cv::Mat mat(dims, CV_32FC1); |
||||
auto data = mat.ptr<float>(); |
||||
|
||||
const size_t range = dims[0] * dims[1] * dims[2] * dims[3]; |
||||
for (size_t i = 0; i < range; ++i) |
||||
{ |
||||
data[i] = static_cast<float>(std::rand()) / RAND_MAX; |
||||
} |
||||
return mat; |
||||
} |
||||
|
||||
void parseYoloRef(const cv::Mat& in_yolo_result, |
||||
const cv::Size& in_size, |
||||
const float confidence_threshold, |
||||
const float nms_threshold, |
||||
const int num_classes, |
||||
const std::vector<float>& anchors, |
||||
std::vector<cv::Rect>& out_boxes, |
||||
std::vector<int>& out_labels) |
||||
{ |
||||
YoloParams params; |
||||
constexpr auto side_square = 13 * 13; |
||||
this->m_out = in_yolo_result.ptr<float>(); |
||||
this->m_side = 13; |
||||
this->m_lcoords = params.coords; |
||||
this->m_lclasses = num_classes; |
||||
|
||||
std::vector<Detection> detections; |
||||
|
||||
for (int i = 0; i < side_square; ++i) |
||||
{ |
||||
for (int b = 0; b < params.num; ++b) |
||||
{ |
||||
float scale = this->scale(i, b); |
||||
if (scale < confidence_threshold) |
||||
{ |
||||
continue; |
||||
} |
||||
double x = this->x(i, b); |
||||
double y = this->y(i, b); |
||||
double height = this->height(i, b, anchors[2 * b + 1]); |
||||
double width = this->width(i, b, anchors[2 * b]); |
||||
|
||||
for (int label = 0; label < num_classes; ++label) |
||||
{ |
||||
float prob = scale * classConf(i,b,label); |
||||
if (prob < confidence_threshold) |
||||
{ |
||||
continue; |
||||
} |
||||
auto box = toBox(x, y, height, width, in_size); |
||||
detections.emplace_back(Detection(box, prob, label)); |
||||
} |
||||
} |
||||
} |
||||
std::stable_sort(std::begin(detections), std::end(detections), |
||||
[](const Detection& a, const Detection& b) |
||||
{ |
||||
return a.conf > b.conf; |
||||
}); |
||||
|
||||
if (nms_threshold < 1.0f) |
||||
{ |
||||
for (const auto& d : detections) |
||||
{ |
||||
if (std::end(out_boxes) == |
||||
std::find_if(std::begin(out_boxes), std::end(out_boxes), |
||||
[&d, nms_threshold](const cv::Rect& r) |
||||
{ |
||||
float rectOverlap = 1.f - static_cast<float>(jaccardDistance(r, d.rect)); |
||||
return rectOverlap > nms_threshold; |
||||
})) |
||||
{ |
||||
out_boxes. emplace_back(d.rect); |
||||
out_labels.emplace_back(d.label); |
||||
} |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
for (const auto& d: detections) |
||||
{ |
||||
out_boxes. emplace_back(d.rect); |
||||
out_labels.emplace_back(d.label); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private: |
||||
struct Detection |
||||
{ |
||||
Detection(const cv::Rect& in_rect, const float in_conf, const int in_label) |
||||
: rect(in_rect), conf(in_conf), label(in_label) |
||||
{} |
||||
cv::Rect rect; |
||||
float conf = 0.0f; |
||||
int label = 0; |
||||
}; |
||||
|
||||
struct YoloParams |
||||
{ |
||||
int num = 5; |
||||
int coords = 4; |
||||
}; |
||||
|
||||
float scale(const int i, const int b) |
||||
{ |
||||
int obj_index = index(i, b, m_lcoords); |
||||
return m_out[obj_index]; |
||||
} |
||||
|
||||
double x(const int i, const int b) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
int col = i % m_side; |
||||
return (col + m_out[box_index]) / m_side; |
||||
} |
||||
|
||||
double y(const int i, const int b) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
int row = i / m_side; |
||||
return (row + m_out[box_index + m_side * m_side]) / m_side; |
||||
} |
||||
|
||||
double width(const int i, const int b, const float anchor) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
return std::exp(m_out[box_index + 2 * m_side * m_side]) * anchor / m_side; |
||||
} |
||||
|
||||
double height(const int i, const int b, const float anchor) |
||||
{ |
||||
int box_index = index(i, b, 0); |
||||
return std::exp(m_out[box_index + 3 * m_side * m_side]) * anchor / m_side; |
||||
} |
||||
|
||||
float classConf(const int i, const int b, const int label) |
||||
{ |
||||
int class_index = index(i, b, m_lcoords + 1 + label); |
||||
return m_out[class_index]; |
||||
} |
||||
|
||||
cv::Rect toBox(const double x, const double y, const double h, const double w, const cv::Size& in_sz) |
||||
{ |
||||
auto h_scale = in_sz.height; |
||||
auto w_scale = in_sz.width; |
||||
cv::Rect r; |
||||
r.x = static_cast<int>((x - w / 2) * w_scale); |
||||
r.y = static_cast<int>((y - h / 2) * h_scale); |
||||
r.width = static_cast<int>(w * w_scale); |
||||
r.height = static_cast<int>(h * h_scale); |
||||
return r; |
||||
} |
||||
|
||||
int index(const int i, const int b, const int entry) |
||||
{ |
||||
return b * m_side * m_side * (m_lcoords + m_lclasses + 1) + entry * m_side * m_side + i; |
||||
} |
||||
|
||||
const float* m_out = nullptr; |
||||
int m_side = 0, m_lcoords = 0, m_lclasses = 0; |
||||
}; |
||||
|
||||
} // namespace opencv_test
|
||||
|
||||
#endif // OPENCV_GAPI_PARSERS_TESTS_COMMON_HPP
|
Loading…
Reference in new issue