Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1907 lines
81 KiB
1907 lines
81 KiB
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html. |
|
// |
|
// Copyright (C) 2018-2022 Intel Corporation |
|
|
|
#include "precomp.hpp" |
|
|
|
// needs to be included regardless if IE is present or not |
|
// (cv::gapi::ie::backend() is still there and is defined always) |
|
#include "backends/ie/giebackend.hpp" |
|
|
|
#ifdef HAVE_INF_ENGINE |
|
|
|
#if INF_ENGINE_RELEASE <= 2019010000 |
|
# error G-API IE module supports only OpenVINO IE >= 2019 R1 |
|
#endif |
|
|
|
#include <functional> |
|
#include <unordered_set> |
|
#include <atomic> |
|
#include <tuple> |
|
|
|
|
|
#include <ade/util/algorithm.hpp> |
|
|
|
#include <ade/util/range.hpp> |
|
#include <ade/util/zip_range.hpp> |
|
#include <ade/util/chain_range.hpp> |
|
#include <ade/typed_graph.hpp> |
|
|
|
#include <opencv2/core/utility.hpp> |
|
#include <opencv2/core/utils/logger.hpp> |
|
|
|
#include <opencv2/gapi/gcommon.hpp> |
|
#include <opencv2/gapi/garray.hpp> |
|
#include <opencv2/gapi/gopaque.hpp> |
|
#include <opencv2/gapi/util/any.hpp> |
|
#include <opencv2/gapi/gtype_traits.hpp> |
|
#include <opencv2/gapi/infer.hpp> |
|
#include <opencv2/gapi/own/convert.hpp> |
|
#include <opencv2/gapi/gframe.hpp> |
|
|
|
#include "compiler/gobjref.hpp" |
|
#include "compiler/gmodel.hpp" |
|
|
|
#include "backends/ie/util.hpp" |
|
#include "backends/ie/giebackend/giewrapper.hpp" |
|
|
|
#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK! |
|
#include "logger.hpp" |
|
|
|
#if INF_ENGINE_RELEASE < 2021010000 |
|
#include "ie_compound_blob.h" |
|
#endif |
|
|
|
#if defined(HAVE_TBB) |
|
# include <tbb/concurrent_queue.h> // FIXME: drop it from here! |
|
template<typename T> using QueueClass = tbb::concurrent_bounded_queue<T>; |
|
#else |
|
# include "executor/conc_queue.hpp" |
|
template<typename T> using QueueClass = cv::gapi::own::concurrent_bounded_queue<T>; |
|
#endif // TBB |
|
|
|
#include "utils/itt.hpp" |
|
|
|
#include "streaming/onevpl/engine/preproc_engine_interface.hpp" |
|
#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" |
|
|
|
namespace IE = InferenceEngine; |
|
|
|
namespace { |
|
|
|
inline IE::ROI toIE(const cv::Rect &rc) { |
|
return IE::ROI |
|
{ 0u |
|
, static_cast<std::size_t>(rc.x) |
|
, static_cast<std::size_t>(rc.y) |
|
, static_cast<std::size_t>(rc.width) |
|
, static_cast<std::size_t>(rc.height) |
|
}; |
|
} |
|
|
|
inline IE::SizeVector toIE(const cv::MatSize &sz) { |
|
return cv::to_own<IE::SizeVector::value_type>(sz); |
|
} |
|
inline std::vector<int> toCV(const IE::SizeVector &vsz) { |
|
std::vector<int> result; |
|
result.reserve(vsz.size()); |
|
for (auto sz : vsz) { |
|
result.push_back(ade::util::checked_cast<int>(sz)); |
|
} |
|
return result; |
|
} |
|
|
|
inline IE::Layout toIELayout(const std::size_t ndims) { |
|
static const IE::Layout lts[] = { |
|
IE::Layout::SCALAR, |
|
IE::Layout::C, |
|
IE::Layout::NC, |
|
IE::Layout::CHW, |
|
IE::Layout::NCHW, |
|
IE::Layout::NCDHW, |
|
}; |
|
// FIXME: This is not really a good conversion, |
|
// since it may also stand for NHWC/HW/CN/NDHWC data |
|
CV_Assert(ndims < sizeof(lts) / sizeof(lts[0])); |
|
return lts[ndims]; |
|
} |
|
|
|
inline IE::Precision toIE(int depth) { |
|
switch (depth) { |
|
case CV_8U: return IE::Precision::U8; |
|
case CV_32S: return IE::Precision::I32; |
|
case CV_32F: return IE::Precision::FP32; |
|
case CV_16F: return IE::Precision::FP16; |
|
default: GAPI_Assert(false && "IE. Unsupported data type"); |
|
} |
|
return IE::Precision::UNSPECIFIED; |
|
} |
|
inline int toCV(IE::Precision prec) { |
|
switch (prec) { |
|
case IE::Precision::U8: return CV_8U; |
|
case IE::Precision::FP32: return CV_32F; |
|
case IE::Precision::I32: return CV_32S; |
|
case IE::Precision::I64: return CV_32S; |
|
case IE::Precision::FP16: return CV_16F; |
|
default: GAPI_Assert(false && "IE. Unsupported data type"); |
|
} |
|
return -1; |
|
} |
|
|
|
inline IE::TensorDesc toIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { |
|
const auto &sz = mat.size; |
|
// NB: For some reason RGB image is 2D image |
|
// (since channel component is not counted here). |
|
// Note: regular 2D vectors also fall into this category |
|
if (sz.dims() == 2 && hint == cv::gapi::ie::TraitAs::IMAGE) |
|
{ |
|
// NB: This logic is mainly taken from IE samples |
|
const size_t channels = mat.channels(); |
|
const size_t height = mat.size().height; |
|
const size_t width = mat.size().width; |
|
|
|
const size_t strideH = mat.step1(); |
|
IE::BlockingDesc bdesc({1, height, width, channels} /* blocking dims */, |
|
{0, 2, 3, 1} /* order for NHWC */, |
|
0 /* offset */, |
|
{0, 0, 0, 0} /* offsets for dims */, |
|
{strideH * height, strideH, channels, 1} /* strides for dims */); |
|
|
|
return IE::TensorDesc(toIE(mat.depth()), |
|
IE::SizeVector{1, channels, height, width}, bdesc); |
|
} |
|
return IE::TensorDesc(toIE(mat.depth()), toIE(sz), toIELayout(sz.dims())); |
|
} |
|
|
|
inline IE::Blob::Ptr wrapIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { |
|
const auto tDesc = toIE(mat, hint); |
|
switch (mat.depth()) { |
|
// NB: Seems there's no way to create an untyped (T-less) Blob::Ptr |
|
// in IE given only precision via TensorDesc. So we have to do this: |
|
#define HANDLE(E,T) \ |
|
case CV_##E: return IE::make_shared_blob<T>(tDesc, const_cast<T*>(mat.ptr<T>())) |
|
HANDLE(8U, uint8_t); |
|
HANDLE(32F, float); |
|
HANDLE(32S, int); |
|
HANDLE(16F, int16_t); |
|
#undef HANDLE |
|
default: GAPI_Assert(false && "IE. Unsupported data type"); |
|
} |
|
return IE::Blob::Ptr{}; |
|
} |
|
|
|
inline IE::Blob::Ptr wrapIE(const cv::MediaFrame::View& view, |
|
const cv::GFrameDesc& desc) { |
|
|
|
switch (desc.fmt) { |
|
case cv::MediaFormat::BGR: { |
|
auto bgr = cv::Mat(desc.size, CV_8UC3, view.ptr[0], view.stride[0]); |
|
return wrapIE(bgr, cv::gapi::ie::TraitAs::IMAGE); |
|
} |
|
case cv::MediaFormat::NV12: { |
|
auto y_plane = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]); |
|
auto uv_plane = cv::Mat(desc.size / 2, CV_8UC2, view.ptr[1], view.stride[1]); |
|
return cv::gapi::ie::util::to_ie(y_plane, uv_plane); |
|
} |
|
case cv::MediaFormat::GRAY: { |
|
auto gray = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]); |
|
return wrapIE(gray, cv::gapi::ie::TraitAs::IMAGE); |
|
} |
|
default: |
|
GAPI_Assert(false && "Unsupported media format for IE backend"); |
|
} |
|
GAPI_Assert(false); |
|
} |
|
|
|
template<class MatType> |
|
inline void copyFromIE(const IE::Blob::Ptr &blob, MatType &mat) { |
|
const auto& desc = blob->getTensorDesc(); |
|
const auto ie_type = toCV(desc.getPrecision()); |
|
if (ie_type != mat.type()) { |
|
std::stringstream ss; |
|
ss << "Failed while copying blob from IE to OCV: " |
|
<< "Blobs have different data types.\n" |
|
<< "IE type: " << ie_type << "\n" |
|
<< "OCV type: " << mat.type() << std::endl; |
|
throw std::logic_error(ss.str()); |
|
} |
|
switch (blob->getTensorDesc().getPrecision()) { |
|
#define HANDLE(E,T) \ |
|
case IE::Precision::E: std::copy_n(blob->buffer().as<T*>(), \ |
|
mat.total(), \ |
|
reinterpret_cast<T*>(mat.data)); \ |
|
break; |
|
HANDLE(U8, uint8_t); |
|
HANDLE(FP32, float); |
|
HANDLE(I32, int); |
|
HANDLE(FP16, cv::float16_t); |
|
#undef HANDLE |
|
case IE::Precision::I64: { |
|
GAPI_LOG_WARNING(NULL, "INT64 isn't supported for cv::Mat. Conversion to INT32 is used."); |
|
cv::gimpl::convertInt64ToInt32(blob->buffer().as<int64_t*>(), |
|
reinterpret_cast<int*>(mat.data), |
|
mat.total()); |
|
break; |
|
} |
|
default: GAPI_Assert(false && "IE. Unsupported data type"); |
|
} |
|
} |
|
|
|
template <typename MapT> |
|
void checkLayerNames(const MapT& network_map, |
|
const std::vector<std::string>& layer_names, |
|
const std::string& layer_type) { |
|
for (const auto& layer_name : layer_names) { |
|
const auto it = network_map.find(layer_name); |
|
if (it == network_map.end()) { |
|
std::stringstream ss; |
|
ss << "Failed to find " << layer_type << " layer with name: " |
|
<< "\"" << layer_name << "\"" << std::endl; |
|
ss << "Network " << layer_type << " layers: " << std::endl; |
|
for (const auto& p : network_map) { |
|
const auto& desc = p.second->getTensorDesc(); |
|
ss << p.first << " : " << desc.getPrecision() |
|
<< " / " << desc.getLayout() << std::endl; |
|
} |
|
throw std::logic_error(ss.str()); |
|
} |
|
} |
|
} |
|
|
|
template <typename MapT> |
|
void checkInputLayerNames(const MapT& network_map, |
|
const std::vector<std::string>& layer_names) { |
|
checkLayerNames(network_map, layer_names, "input"); |
|
} |
|
|
|
template <typename MapT> |
|
void checkOutputLayerNames(const MapT& network_map, |
|
const std::vector<std::string>& layer_names) { |
|
checkLayerNames(network_map, layer_names, "output"); |
|
} |
|
|
|
// IE-specific metadata, represents a network with its parameters |
|
struct IEUnit { |
|
static const char *name() { return "IEModelConfig"; } |
|
|
|
cv::gapi::ie::detail::ParamDesc params; |
|
IE::CNNNetwork net; |
|
|
|
IE::ExecutableNetwork this_network; |
|
cv::gimpl::ie::wrap::Plugin this_plugin; |
|
|
|
InferenceEngine::RemoteContext::Ptr rctx = nullptr; |
|
|
|
std::shared_ptr<cv::gapi::wip::IPreprocEngine> preproc_engine_impl; |
|
|
|
// FIXME: Unlike loadNetwork case, importNetwork requires that preprocessing |
|
// should be passed as ExecutableNetwork::SetBlob method, so need to collect |
|
// and store this information at the graph compilation stage (outMeta) and use in runtime. |
|
using PreProcMap = std::unordered_map<std::string, IE::PreProcessInfo>; |
|
PreProcMap preproc_map; |
|
|
|
// NEW FIXME: Need to aggregate getInputInfo & GetInputInfo from network |
|
// into generic wrapper and invoke it at once in single place instead of |
|
// analyzing ParamDesc::Kind::Load/Import every time when we need to get access |
|
// for network info. |
|
// In term of introducing custom VPP/VPL preprocessing functionality |
|
// It was decided to use GFrameDesc as such aggregated network info with limitation |
|
// that VPP/VPL produces cv::MediaFrame only. But it should be not considered as |
|
// final solution |
|
class InputFramesDesc { |
|
using input_name_type = std::string; |
|
using description_type = cv::GFrameDesc; |
|
std::map<input_name_type, description_type> map; |
|
public: |
|
static bool is_applicable(const cv::GMetaArg &mm); |
|
const description_type &get_param(const input_name_type &input) const; |
|
|
|
void set_param(const input_name_type &input, |
|
const IE::TensorDesc& desc); |
|
}; |
|
|
|
InputFramesDesc net_input_params; |
|
|
|
explicit IEUnit(const cv::gapi::ie::detail::ParamDesc &pp) |
|
: params(pp) { |
|
InferenceEngine::ParamMap* ctx_params = |
|
cv::util::any_cast<InferenceEngine::ParamMap>(¶ms.context_config); |
|
if (ctx_params != nullptr) { |
|
auto ie_core = cv::gimpl::ie::wrap::getCore(); |
|
GAPI_LOG_DEBUG(nullptr, "create IE remote ctx for device id: " << params.device_id); |
|
rctx = ie_core.CreateContext(params.device_id, *ctx_params); |
|
} |
|
|
|
if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
net = cv::gimpl::ie::wrap::readNetwork(params); |
|
// NB: Set batch size only if user asked. (don't set by default) |
|
if (params.batch_size.has_value()) { |
|
net.setBatchSize(params.batch_size.value()); |
|
} |
|
} else if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import) { |
|
this_plugin = cv::gimpl::ie::wrap::getPlugin(params); |
|
this_network = cv::gimpl::ie::wrap::importNetwork(this_plugin, params, rctx); |
|
if (!params.reshape_table.empty() || !params.layer_names_to_reshape.empty()) { |
|
GAPI_LOG_WARNING(NULL, "Reshape isn't supported for imported network"); |
|
} |
|
} else { |
|
cv::util::throw_error(std::logic_error("Unsupported ParamDesc::Kind")); |
|
} |
|
|
|
// The practice shows that not all inputs and not all outputs |
|
// are mandatory to specify in IE model. |
|
// So what we're concerned here about is: |
|
// if operation's (not topology's) input/output number is |
|
// greater than 1, then we do care about input/output layer |
|
// names. Otherwise, names are picked up automatically. |
|
// TODO: Probably this check could be done at the API entry point? (gnet) |
|
if (params.num_in > 1u && params.num_in != params.input_names.size()) { |
|
cv::util::throw_error(std::logic_error("Please specify input layer names for " |
|
+ params.model_path)); |
|
} |
|
if (params.num_out > 1u && params.num_out != params.output_names.size()) { |
|
cv::util::throw_error(std::logic_error("Please specify output layer names for " |
|
+ params.model_path)); |
|
} |
|
if (params.num_in == 1u && params.input_names.empty()) { |
|
if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
params.input_names = { net.getInputsInfo().begin()->first }; |
|
} else { |
|
params.input_names = { this_network.GetInputsInfo().begin()->first }; |
|
} |
|
} |
|
if (params.num_out == 1u && params.output_names.empty()) { |
|
if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
params.output_names = { net.getOutputsInfo().begin()->first }; |
|
} else { |
|
params.output_names = { this_network.GetOutputsInfo().begin()->first }; |
|
} |
|
} |
|
if (!params.reshape_table.empty()) { |
|
GAPI_Assert((params.reshape_table.size() + params.layer_names_to_reshape.size()) <= |
|
params.num_in && |
|
"Number of layers to reshape must be less than or equal to number of inputs"); |
|
} |
|
|
|
if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
checkInputLayerNames(net.getInputsInfo(), params.input_names); |
|
checkOutputLayerNames(net.getOutputsInfo(), params.output_names); |
|
} else if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import) { |
|
checkInputLayerNames(this_network.GetInputsInfo(), params.input_names); |
|
checkOutputLayerNames(this_network.GetOutputsInfo(), params.output_names); |
|
} else { |
|
cv::util::throw_error(std::logic_error("Unsupported ParamDesc::Kind")); |
|
} |
|
|
|
if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import && |
|
!cv::util::holds_alternative<cv::util::monostate>(params.output_precision)) { |
|
cv::util::throw_error( |
|
std::logic_error("Setting output precision isn't supported for imported network")); |
|
} |
|
|
|
|
|
using namespace cv::gapi::wip::onevpl; |
|
if (params.vpl_preproc_device.has_value() && params.vpl_preproc_ctx.has_value()) { |
|
using namespace cv::gapi::wip; |
|
GAPI_LOG_INFO(nullptr, "VPP preproc creation requested"); |
|
preproc_engine_impl = |
|
IPreprocEngine::create_preproc_engine<onevpl::VPPPreprocDispatcher>( |
|
params.vpl_preproc_device.value(), |
|
params.vpl_preproc_ctx.value()); |
|
GAPI_LOG_INFO(nullptr, "VPP preproc created successfuly"); |
|
} |
|
} |
|
|
|
// This method is [supposed to be] called at Island compilation stage |
|
cv::gimpl::ie::IECompiled compile() const { |
|
IEUnit* non_const_this = const_cast<IEUnit*>(this); |
|
// FIXME: LoadNetwork must be called only after all necessary model |
|
// inputs information is set, since it's done in outMeta and compile called after that, |
|
// this place seems to be suitable, but consider another place not to break const agreements. |
|
if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
non_const_this->this_plugin = cv::gimpl::ie::wrap::getPlugin(params); |
|
non_const_this->this_network = cv::gimpl::ie::wrap::loadNetwork(non_const_this->this_plugin, |
|
net, params, rctx); |
|
} |
|
|
|
return {params, this_plugin, this_network}; |
|
} |
|
}; |
|
|
|
bool IEUnit::InputFramesDesc::is_applicable(const cv::GMetaArg &mm) { |
|
return cv::util::holds_alternative<cv::GFrameDesc>(mm); |
|
} |
|
|
|
const IEUnit::InputFramesDesc::description_type & |
|
IEUnit::InputFramesDesc::get_param(const input_name_type &input) const { |
|
auto it = map.find(input); |
|
GAPI_Assert(it != map.end() && "No appropriate input is found in InputFramesDesc"); |
|
return it->second; |
|
} |
|
|
|
void IEUnit::InputFramesDesc::set_param(const input_name_type &input, |
|
const IE::TensorDesc& desc) { |
|
description_type ret; |
|
ret.fmt = cv::MediaFormat::NV12; |
|
const InferenceEngine::SizeVector& inDims = desc.getDims(); |
|
auto layout = desc.getLayout(); |
|
GAPI_LOG_DEBUG(nullptr, "network input: " << input << |
|
", tensor dims: " << inDims[0] << ", " << inDims[1] << |
|
", " << inDims[2] << ", " << inDims[3]); |
|
if (layout != InferenceEngine::NHWC && layout != InferenceEngine::NCHW) { |
|
GAPI_LOG_WARNING(nullptr, "Unsupported layout for VPP preproc: " << layout << |
|
", input name: " << input); |
|
GAPI_Assert(false && "Unsupported layout for VPP preproc"); |
|
} |
|
GAPI_Assert(inDims.size() == 4u); |
|
ret.size.width = static_cast<int>(inDims[3]); |
|
ret.size.height = static_cast<int>(inDims[2]); |
|
|
|
auto res = map.emplace(input, ret); |
|
GAPI_Assert(res.second && "Duplicated input info in InputFramesDesc are not allowable"); |
|
} |
|
|
|
class IECallContext |
|
{ |
|
public: |
|
IECallContext(const IEUnit & unit, |
|
cv::gimpl::GIslandExecutable::IOutput & output, |
|
const cv::GArgs & args, |
|
const std::vector<cv::gimpl::RcDesc> & outs, |
|
std::vector<cv::gimpl::GIslandExecutable::InObj> && input_objs, |
|
std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs); |
|
|
|
const cv::GArgs& inArgs() const; |
|
|
|
// Generic accessor API |
|
template<typename T> |
|
const T& inArg(std::size_t input) const { |
|
return m_args.at(input).get<T>(); |
|
} |
|
|
|
template<typename T> |
|
std::vector<T>& outVecR(std::size_t output) { |
|
return outVecRef(output).wref<T>(); |
|
} |
|
|
|
// Syntax sugar |
|
cv::GShape inShape(std::size_t input) const; |
|
const cv::Mat& inMat (std::size_t input) const; |
|
const cv::MediaFrame& inFrame(std::size_t input) const; |
|
|
|
const cv::GRunArg& input (std::size_t idx) const; |
|
cv::GRunArgP output (std::size_t idx); |
|
cv::Mat& outMatR(std::size_t idx); |
|
|
|
const IEUnit &uu; |
|
cv::gimpl::GIslandExecutable::IOutput &out; |
|
|
|
// NB: Need to guarantee that MediaFrame::View doesn't die until request is over. |
|
using Views = std::vector<std::unique_ptr<cv::MediaFrame::View>>; |
|
Views views; |
|
|
|
// To store exception appeared in callback. |
|
std::exception_ptr eptr; |
|
|
|
using req_key_t = void*; |
|
cv::MediaFrame* prepareKeepAliveFrameSlot(req_key_t key); |
|
size_t releaseKeepAliveFrame(req_key_t key); |
|
private: |
|
cv::detail::VectorRef& outVecRef(std::size_t idx); |
|
|
|
cv::GArg packArg(const cv::GArg &arg); |
|
|
|
// To store input/output data from frames |
|
std::vector<cv::gimpl::GIslandExecutable::InObj> m_input_objs; |
|
std::vector<cv::gimpl::GIslandExecutable::OutObj> m_output_objs; |
|
|
|
// To simplify access to cv::Mat inside cv::RMat |
|
cv::gimpl::Mag m_res; |
|
|
|
// FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call |
|
//to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run, |
|
//once on enter for input and output arguments, and once before return for output arguments only |
|
// FIXME: check if the above applies to this backend (taken from CPU) |
|
std::unordered_map<std::size_t, cv::GRunArgP> m_results; |
|
|
|
// Input parameters passed to an inference operation. |
|
cv::GArgs m_args; |
|
cv::GShapes m_in_shapes; |
|
|
|
// keep alive preprocessed frames |
|
std::mutex keep_alive_frames_mutex; |
|
std::unordered_map<req_key_t, cv::MediaFrame> keep_alive_pp_frames; |
|
}; |
|
|
|
IECallContext::IECallContext(const IEUnit & unit, |
|
cv::gimpl::GIslandExecutable::IOutput & output, |
|
const cv::GArgs & args, |
|
const std::vector<cv::gimpl::RcDesc> & outs, |
|
std::vector<cv::gimpl::GIslandExecutable::InObj> && input_objs, |
|
std::vector<cv::gimpl::GIslandExecutable::OutObj> && output_objs) |
|
: uu(unit), out(output), m_input_objs(std::move(input_objs)), m_output_objs(std::move(output_objs)) |
|
{ |
|
for (auto& it : m_input_objs) cv::gimpl::magazine::bindInArg (m_res, it.first, it.second); |
|
for (auto& it : m_output_objs) cv::gimpl::magazine::bindOutArg(m_res, it.first, it.second); |
|
|
|
m_args.reserve(args.size()); |
|
using namespace std::placeholders; |
|
ade::util::transform(args, |
|
std::back_inserter(m_args), |
|
std::bind(&IECallContext::packArg, this, _1)); |
|
|
|
ade::util::transform(args, std::back_inserter(m_in_shapes), |
|
[](const cv::GArg& arg) { |
|
return arg.get<cv::gimpl::RcDesc>().shape; |
|
}); |
|
|
|
for (const auto out_it : ade::util::indexed(outs)) { |
|
// FIXME: Can the same GArg type resolution mechanism be reused here? |
|
const auto port = ade::util::index(out_it); |
|
const auto desc = ade::util::value(out_it); |
|
m_results[port] = cv::gimpl::magazine::getObjPtr(m_res, desc); |
|
} |
|
} |
|
|
|
const cv::GArgs& IECallContext::inArgs() const { |
|
return m_args; |
|
} |
|
|
|
cv::GShape IECallContext::inShape(std::size_t i) const { |
|
return m_in_shapes[i]; |
|
} |
|
|
|
const cv::Mat& IECallContext::inMat(std::size_t input) const { |
|
return inArg<cv::Mat>(input); |
|
} |
|
|
|
const cv::MediaFrame& IECallContext::inFrame(std::size_t input) const { |
|
return inArg<cv::MediaFrame>(input); |
|
} |
|
|
|
cv::Mat& IECallContext::outMatR(std::size_t idx) { |
|
return *cv::util::get<cv::Mat*>(m_results.at(idx)); |
|
} |
|
|
|
cv::GRunArgP IECallContext::output(std::size_t idx) { |
|
return m_output_objs[idx].second; |
|
}; |
|
|
|
const cv::GRunArg& IECallContext::input(std::size_t idx) const { |
|
return m_input_objs[idx].second; |
|
} |
|
|
|
cv::detail::VectorRef& IECallContext::outVecRef(std::size_t idx) { |
|
return cv::util::get<cv::detail::VectorRef>(m_results.at(idx)); |
|
} |
|
|
|
cv::GArg IECallContext::packArg(const cv::GArg &arg) { |
|
// No API placeholders allowed at this point |
|
// FIXME: this check has to be done somewhere in compilation stage. |
|
GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT |
|
&& arg.kind != cv::detail::ArgKind::GSCALAR |
|
&& arg.kind != cv::detail::ArgKind::GARRAY); |
|
|
|
if (arg.kind != cv::detail::ArgKind::GOBJREF) { |
|
cv::util::throw_error(std::logic_error("Inference supports G-types ONLY!")); |
|
} |
|
GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF); |
|
|
|
// Wrap associated CPU object (either host or an internal one) |
|
// FIXME: object can be moved out!!! GExecutor faced that. |
|
const cv::gimpl::RcDesc &ref = arg.get<cv::gimpl::RcDesc>(); |
|
switch (ref.shape) |
|
{ |
|
case cv::GShape::GMAT: return cv::GArg(m_res.slot<cv::Mat>()[ref.id]); |
|
|
|
// Note: .at() is intentional for GArray as object MUST be already there |
|
// (and constructed by either bindIn/Out or resetInternal) |
|
case cv::GShape::GARRAY: return cv::GArg(m_res.slot<cv::detail::VectorRef>().at(ref.id)); |
|
|
|
// Note: .at() is intentional for GOpaque as object MUST be already there |
|
// (and constructed by either bindIn/Out or resetInternal) |
|
case cv::GShape::GOPAQUE: return cv::GArg(m_res.slot<cv::detail::OpaqueRef>().at(ref.id)); |
|
|
|
case cv::GShape::GFRAME: return cv::GArg(m_res.slot<cv::MediaFrame>().at(ref.id)); |
|
|
|
default: |
|
cv::util::throw_error(std::logic_error("Unsupported GShape type")); |
|
break; |
|
} |
|
} |
|
|
|
cv::MediaFrame* IECallContext::prepareKeepAliveFrameSlot(req_key_t key) { |
|
std::lock_guard<std::mutex> lock(keep_alive_frames_mutex); |
|
return &keep_alive_pp_frames[key]; |
|
} |
|
|
|
size_t IECallContext::releaseKeepAliveFrame(req_key_t key) { |
|
size_t elapsed_count = 0; |
|
void *prev_slot = nullptr; |
|
// NB: release MediaFrame previously captured by prepareKeepAliveFrameSlot |
|
// We must capture it to keep a reference counter on inner media adapter |
|
// to ensure that frame resource would be locked until inference done. |
|
// Otherwise decoder could seized this frame resource as free/unlocked resource |
|
// from resource pool |
|
// Current function just take a unique frame `key` and overwrite stored |
|
// actual frame by empty frame |
|
{ |
|
std::lock_guard<std::mutex> lock(keep_alive_frames_mutex); |
|
auto ka_frame_it = keep_alive_pp_frames.find(key); |
|
if (ka_frame_it != keep_alive_pp_frames.end()) { |
|
prev_slot = &ka_frame_it->second; |
|
ka_frame_it->second = cv::MediaFrame(); |
|
} |
|
elapsed_count = keep_alive_pp_frames.size(); |
|
} |
|
cv::util::suppress_unused_warning(prev_slot); |
|
GAPI_LOG_DEBUG(nullptr, "Release keep alive frame, slot: " << prev_slot << |
|
", reserved frames count: " << elapsed_count); |
|
return elapsed_count; |
|
} |
|
|
|
struct IECallable { |
|
static const char *name() { return "IERequestCallable"; } |
|
using Run = std::function<void(std::shared_ptr<IECallContext>, cv::gimpl::ie::RequestPool&)>; |
|
Run run; |
|
}; |
|
|
|
struct KImpl { |
|
cv::gimpl::CustomMetaFunction::CM customMetaFunc; |
|
IECallable::Run run; |
|
}; |
|
|
|
// FIXME: Is there a way to take a typed graph (our GModel), |
|
// and create a new typed graph _ATOP_ of that (by extending with a couple of |
|
// new types?). |
|
// Alternatively, is there a way to compose types graphs? |
|
// |
|
// If not, we need to introduce that! |
|
using GIEModel = ade::TypedGraph |
|
< cv::gimpl::Protocol |
|
, cv::gimpl::Op |
|
, cv::gimpl::NetworkParams |
|
, cv::gimpl::CustomMetaFunction |
|
, IEUnit |
|
, IECallable |
|
>; |
|
|
|
// FIXME: Same issue with Typed and ConstTyped |
|
using GConstGIEModel = ade::ConstTypedGraph |
|
< cv::gimpl::Protocol |
|
, cv::gimpl::Op |
|
, cv::gimpl::NetworkParams |
|
, cv::gimpl::CustomMetaFunction |
|
, IEUnit |
|
, IECallable |
|
>; |
|
|
|
cv::MediaFrame preprocess_frame_impl(cv::MediaFrame &&in_frame, const std::string &layer_name, |
|
IECallContext& ctx, |
|
const cv::util::optional<cv::Rect> &opt_roi, |
|
cv::MediaFrame* out_keep_alive_frame, |
|
bool* out_is_preprocessed) { |
|
cv::util::optional<cv::gapi::wip::pp_params> param = |
|
ctx.uu.preproc_engine_impl->is_applicable(in_frame); |
|
if (param.has_value()) { |
|
GAPI_LOG_DEBUG(nullptr, "VPP preprocessing for decoded remote frame will be used"); |
|
cv::GFrameDesc expected_net_input_descr = |
|
ctx.uu.net_input_params.get_param(layer_name); |
|
|
|
// TODO: Find a better place to configure media format for GPU |
|
// adjust color conversion to NV12 according to OV GPU limitation |
|
if(ctx.uu.params.device_id.find("GPU") != std::string::npos && |
|
ctx.uu.rctx) { |
|
auto it = ctx.uu.params.config.find(std::string("GPU_NV12_TWO_INPUTS")); |
|
if (it != ctx.uu.params.config.end()) { |
|
if (it->second == "YES") { |
|
GAPI_LOG_DEBUG(nullptr, "Adjust preprocessing GPU media format to NV12"); |
|
expected_net_input_descr.fmt = cv::MediaFormat::NV12; |
|
} |
|
} |
|
} |
|
|
|
cv::gapi::wip::pp_session pp_sess = |
|
ctx.uu.preproc_engine_impl->initialize_preproc(param.value(), |
|
expected_net_input_descr); |
|
|
|
in_frame = ctx.uu.preproc_engine_impl->run_sync(pp_sess, in_frame, opt_roi); |
|
|
|
if (out_keep_alive_frame != nullptr) { |
|
GAPI_LOG_DEBUG(nullptr, "remember preprocessed remote frame to keep it busy from reuse, slot: " << |
|
out_keep_alive_frame); |
|
*out_keep_alive_frame = in_frame; |
|
} |
|
if (out_is_preprocessed) { |
|
*out_is_preprocessed = true; |
|
} |
|
} // otherwise it is not suitable frame, then check on other preproc backend or rely on IE plugin |
|
return std::move(in_frame); |
|
} |
|
|
|
inline IE::Blob::Ptr extractBlob(IECallContext& ctx, |
|
std::size_t i, |
|
cv::gapi::ie::TraitAs hint, |
|
const std::string& layer_name, |
|
const cv::util::optional<cv::Rect> &opt_roi, |
|
cv::MediaFrame* out_keep_alive_frame = nullptr, |
|
bool* out_is_preprocessed = nullptr) { |
|
switch (ctx.inShape(i)) { |
|
case cv::GShape::GFRAME: { |
|
auto frame = ctx.inFrame(i); |
|
if (ctx.uu.preproc_engine_impl) { |
|
GAPI_LOG_DEBUG(nullptr, "Try to use preprocessing for decoded frame in local ctx"); |
|
frame = preprocess_frame_impl(std::move(frame), layer_name, ctx, opt_roi, |
|
out_keep_alive_frame, out_is_preprocessed); |
|
} |
|
|
|
// NB: check OV remote device context availability. |
|
// if it exist and MediaFrame shares the same device context |
|
// then we create a remote blob without memory copy |
|
if (ctx.uu.rctx != nullptr) { |
|
// Request params for result frame whatever it got preprocessed or not |
|
cv::util::any any_blob_params = frame.blobParams(); |
|
using ParamType = std::pair<InferenceEngine::TensorDesc, InferenceEngine::ParamMap>; |
|
using NV12ParamType = std::pair<ParamType, ParamType>; |
|
|
|
NV12ParamType* blob_params = cv::util::any_cast<NV12ParamType>(&any_blob_params); |
|
if (blob_params == nullptr) { |
|
GAPI_Assert(false && "Incorrect type of blobParams:" |
|
"expected std::pair<ParamType, ParamType>," |
|
"with ParamType std::pair<InferenceEngine::TensorDesc," |
|
"InferenceEngine::ParamMap >>"); |
|
} |
|
|
|
//The parameters are TensorDesc and ParamMap for both y and uv blobs |
|
auto y_blob = ctx.uu.rctx->CreateBlob(blob_params->first.first, blob_params->first.second); |
|
auto uv_blob = ctx.uu.rctx->CreateBlob(blob_params->second.first, blob_params->second.second); |
|
|
|
#if INF_ENGINE_RELEASE >= 2021010000 |
|
return IE::make_shared_blob<IE::NV12Blob>(y_blob, uv_blob); |
|
#else |
|
return IE::make_shared_blob<InferenceEngine::NV12Blob>(y_blob, uv_blob); |
|
#endif |
|
} |
|
|
|
// NB: If no OV remote context created then use default MediaFrame accessor approach: |
|
// it invokes memory copying operation If GPU MediaFrame come |
|
ctx.views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R))); |
|
return wrapIE(*(ctx.views.back()), frame.desc()); |
|
} |
|
case cv::GShape::GMAT: { |
|
return wrapIE(ctx.inMat(i), hint); |
|
} |
|
default: |
|
GAPI_Assert("Unsupported input shape for IE backend"); |
|
} |
|
GAPI_Assert(false); |
|
} |
|
|
|
|
|
static void setBlob(InferenceEngine::InferRequest& req, |
|
const std::string& layer_name, |
|
const IE::Blob::Ptr& blob, |
|
const IECallContext& ctx) { |
|
// TODO: Ideally we shouldn't do SetBlob() but GetBlob() instead, |
|
// and redirect our data producers to this memory |
|
// (A memory dialog comes to the picture again) |
|
using namespace cv::gapi::ie::detail; |
|
if (ctx.uu.params.kind == ParamDesc::Kind::Load) { |
|
req.SetBlob(layer_name, blob); |
|
} else { |
|
GAPI_Assert(ctx.uu.params.kind == ParamDesc::Kind::Import); |
|
req.SetBlob(layer_name, blob, ctx.uu.preproc_map.at(layer_name)); |
|
} |
|
} |
|
|
|
static void setROIBlob(InferenceEngine::InferRequest& req, |
|
const std::string& layer_name, |
|
const IE::Blob::Ptr& blob, |
|
const cv::Rect &roi, |
|
const IECallContext& ctx) { |
|
if (ctx.uu.params.device_id.find("GPU") != std::string::npos && |
|
ctx.uu.rctx) { |
|
try { |
|
// NB: make_shared_blob() cannot work with GPU NV12 & ROI at the moment. |
|
// OpenVINO produces exception with unsupported status. |
|
// To do not encounter with silent crash situation we should catch OV exception |
|
// and suggest to avoid this problem by using inner preprocessing feature. |
|
// VPP/VPL proprocessing are supported at the moment |
|
setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); |
|
} catch (const std::exception &ex) { |
|
GAPI_LOG_WARNING(nullptr, "cannot set ROI blob for layer: " << layer_name << |
|
", reason:\n" << ex.what() << |
|
"\nTry using self GAPI preprocessing feature: " |
|
" Check method `cfgPreprocessingParams` in `cv::gapi::ie::Params`"); |
|
throw; |
|
} |
|
} else { |
|
setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); |
|
} |
|
} |
|
} // anonymous namespace |
|
|
|
std::vector<InferenceEngine::InferRequest> cv::gimpl::ie::IECompiled::createInferRequests() { |
|
std::vector<InferenceEngine::InferRequest> requests; |
|
requests.reserve(params.nireq); |
|
|
|
for (size_t i = 0; i < params.nireq; ++i) { |
|
requests.push_back(this_network.CreateInferRequest()); |
|
auto& request = requests.back(); |
|
// Bind const data to infer request |
|
for (auto &&p : params.const_inputs) { |
|
// FIXME: SetBlob is known to be inefficient, |
|
// it is worth to make a customizable "initializer" and pass the |
|
// cv::Mat-wrapped blob there to support IE's optimal "GetBlob idiom" |
|
// Still, constant data is to set only once. |
|
request.SetBlob(p.first, wrapIE(p.second.first, p.second.second)); |
|
} |
|
} |
|
|
|
return requests; |
|
} |
|
|
|
class cv::gimpl::ie::RequestPool { |
|
public: |
|
using RunF = std::function<void(InferenceEngine::InferRequest&)>; |
|
using CallbackF = std::function<void(InferenceEngine::InferRequest&, InferenceEngine::StatusCode)>; |
|
|
|
// NB: The task is represented by: |
|
// RunF - function which is set blobs and run async inference. |
|
// CallbackF - function which is obtain output blobs and post it to output. |
|
struct Task { |
|
RunF run; |
|
CallbackF callback; |
|
}; |
|
|
|
explicit RequestPool(std::vector<InferenceEngine::InferRequest>&& requests); |
|
|
|
void execute(Task&& t); |
|
void waitAll(); |
|
|
|
private: |
|
void callback(Task task, |
|
size_t id, |
|
IE::InferRequest request, |
|
IE::StatusCode code) noexcept; |
|
void setup(); |
|
|
|
QueueClass<size_t> m_idle_ids; |
|
std::vector<InferenceEngine::InferRequest> m_requests; |
|
}; |
|
|
|
// RequestPool implementation ////////////////////////////////////////////// |
|
cv::gimpl::ie::RequestPool::RequestPool(std::vector<InferenceEngine::InferRequest>&& requests) |
|
: m_requests(std::move(requests)) { |
|
setup(); |
|
} |
|
|
|
void cv::gimpl::ie::RequestPool::setup() { |
|
for (size_t i = 0; i < m_requests.size(); ++i) { |
|
m_idle_ids.push(i); |
|
} |
|
} |
|
|
|
void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t) { |
|
size_t id = 0u; |
|
m_idle_ids.pop(id); |
|
|
|
auto& request = m_requests[id]; |
|
|
|
using namespace std::placeholders; |
|
using callback_t = std::function<void(IE::InferRequest, IE::StatusCode)>; |
|
request.SetCompletionCallback( |
|
static_cast<callback_t>( |
|
std::bind(&cv::gimpl::ie::RequestPool::callback, this, |
|
t, id, _1, _2))); |
|
// NB: InferRequest is already marked as busy |
|
// in case of exception need to return it back to the idle. |
|
try { |
|
t.run(request); |
|
} catch (...) { |
|
request.SetCompletionCallback([](){}); |
|
m_idle_ids.push(id); |
|
throw; |
|
} |
|
} |
|
|
|
void cv::gimpl::ie::RequestPool::callback(cv::gimpl::ie::RequestPool::Task task, |
|
size_t id, |
|
IE::InferRequest request, |
|
IE::StatusCode code) noexcept { |
|
// NB: Inference is over. |
|
// 1. Run callback |
|
// 2. Destroy callback to free resources. |
|
// 3. Mark InferRequest as idle. |
|
task.callback(request, code); |
|
request.SetCompletionCallback([](){}); |
|
m_idle_ids.push(id); |
|
} |
|
|
|
// NB: Not thread-safe. |
|
void cv::gimpl::ie::RequestPool::waitAll() { |
|
// NB: It will be blocked if at least one request is busy. |
|
for (size_t i = 0; i < m_requests.size(); ++i) { |
|
size_t id = 0u; |
|
m_idle_ids.pop(id); |
|
} |
|
setup(); |
|
} |
|
|
|
// GCPUExcecutable implementation ////////////////////////////////////////////// |
|
cv::gimpl::ie::GIEExecutable::GIEExecutable(const ade::Graph &g, |
|
const std::vector<ade::NodeHandle> &nodes) |
|
: m_g(g), m_gm(m_g) { |
|
// FIXME: Currently this backend is capable to run a single inference node only. |
|
// Need to extend our island fusion with merge/not-to-merge decision making parametrization |
|
GConstGIEModel iem(g); |
|
|
|
for (auto &nh : nodes) { |
|
switch (m_gm.metadata(nh).get<NodeType>().t) { |
|
case NodeType::OP: |
|
if (this_nh == nullptr) { |
|
this_nh = nh; |
|
this_iec = iem.metadata(this_nh).get<IEUnit>().compile(); |
|
m_reqPool.reset(new RequestPool(this_iec.createInferRequests())); |
|
} |
|
else |
|
util::throw_error(std::logic_error("Multi-node inference is not supported!")); |
|
break; |
|
|
|
case NodeType::DATA: { |
|
m_dataNodes.push_back(nh); |
|
const auto &desc = m_gm.metadata(nh).get<Data>(); |
|
if (desc.storage == Data::Storage::CONST_VAL) { |
|
util::throw_error(std::logic_error("No const data please!")); |
|
} |
|
if (desc.storage == Data::Storage::INTERNAL) { |
|
util::throw_error(std::logic_error("No internal data please!")); |
|
} |
|
break; |
|
} |
|
default: util::throw_error(std::logic_error("Unsupported NodeType type")); |
|
} |
|
} |
|
} |
|
|
|
void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in, |
|
cv::gimpl::GIslandExecutable::IOutput &out) { |
|
// General algorithm: |
|
// 1. Collect island inputs/outputs. |
|
// 2. Create kernel context. (Every kernel has his own context). |
|
// 3. If the EndOfStream message is recieved, wait until all passed task are done. |
|
// 4. If the Exception message is revieved, propagate it further. |
|
// 5. |
|
// 5.1 Run the kernel. |
|
// 5.2 Kernel wait for all nececcary infer requests and start asynchronous execution. |
|
// 5.3 After the kernel is finished continue processing next frame. |
|
// |
|
// 6. If graph is compiled in non-streaming mode, wait until all tasks are done. |
|
|
|
std::vector<InObj> input_objs; |
|
std::vector<OutObj> output_objs; |
|
|
|
const auto &in_desc = in.desc(); |
|
auto in_msg = in.get(); |
|
|
|
if (cv::util::holds_alternative<cv::gimpl::EndOfStream>(in_msg)) |
|
{ |
|
// (3) Wait until all passed task are done. |
|
m_reqPool->waitAll(); |
|
out.post(cv::gimpl::EndOfStream{}); |
|
return; |
|
} |
|
|
|
GAPI_Assert(cv::util::holds_alternative<cv::GRunArgs>(in_msg)); |
|
const auto in_vector = cv::util::get<cv::GRunArgs>(in_msg); |
|
|
|
// (1) Collect island inputs/outputs |
|
input_objs.reserve(in_desc.size()); |
|
for (auto &&it: ade::util::zip(ade::util::toRange(in_desc), |
|
ade::util::toRange(in_vector))) |
|
{ |
|
input_objs.emplace_back(std::get<0>(it), std::get<1>(it)); |
|
} |
|
|
|
const auto &out_desc = out.desc(); |
|
output_objs.reserve(out_desc.size()); |
|
for (auto &&it: ade::util::indexed(ade::util::toRange(out_desc))) |
|
{ |
|
output_objs.emplace_back(ade::util::value(it), |
|
out.get(ade::util::checked_cast<int>(ade::util::index(it)))); |
|
} |
|
|
|
GConstGIEModel giem(m_g); |
|
const auto &uu = giem.metadata(this_nh).get<IEUnit>(); |
|
const auto &op = m_gm.metadata(this_nh).get<Op>(); |
|
// (2) Create kernel context |
|
auto ctx = std::make_shared<IECallContext>(uu, out, op.args, op.outs, |
|
std::move(input_objs), std::move(output_objs)); |
|
|
|
const auto &kk = giem.metadata(this_nh).get<IECallable>(); |
|
|
|
// (5) Run the kernel. |
|
try { |
|
kk.run(ctx, *m_reqPool); |
|
} catch (...) { |
|
auto eptr = std::current_exception(); |
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) |
|
{ |
|
auto output = ctx->output(i); |
|
ctx->out.post(std::move(output), eptr); |
|
} |
|
return; |
|
} |
|
|
|
// (6) In non-streaming mode need to wait until the all tasks are done |
|
// FIXME: Is there more graceful way to handle this case ? |
|
if (!m_gm.metadata().contains<Streaming>()) { |
|
m_reqPool->waitAll(); |
|
} |
|
} |
|
|
|
namespace cv { |
|
namespace gimpl { |
|
namespace ie { |
|
static void configureInputReshapeByImage(const IE::InputInfo::Ptr& ii, |
|
const cv::GMetaArg mm, |
|
IE::ICNNNetwork::InputShapes& input_reshape_table) { |
|
const auto& layer_name = ii->name(); |
|
// Finding name in reshape table |
|
const auto name_pos_in_table = input_reshape_table.find(layer_name); |
|
// If contains then reshape for this layer already configured by shapes |
|
// otherwise create a new element of reshape table with name and dimension |
|
// which based on input image size. |
|
if (name_pos_in_table != input_reshape_table.end()) { |
|
GAPI_Assert(false && |
|
"Names of layers for reshape with specified dimensions shouldn't intersect with names for reshape by image"); |
|
} |
|
cv::Size image_sz; |
|
switch (mm.index()) { |
|
case cv::GMetaArg::index_of<cv::GMatDesc>(): |
|
{ |
|
const auto &meta = util::get<cv::GMatDesc>(mm); |
|
image_sz = meta.size; |
|
break; |
|
} |
|
case cv::GMetaArg::index_of<cv::GFrameDesc>(): |
|
{ |
|
const auto &meta = util::get<cv::GFrameDesc>(mm); |
|
image_sz = meta.size; |
|
break; |
|
} |
|
default: |
|
util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); |
|
} |
|
auto input_dims = ii->getTensorDesc().getDims(); |
|
const auto size = input_dims.size(); |
|
if (size <= 1) { |
|
GAPI_Assert(false && "Unsupported number of dimensions for reshape by image"); |
|
} |
|
input_dims.at(size - 2) = static_cast<size_t>(image_sz.height); |
|
input_dims.at(size - 1) = static_cast<size_t>(image_sz.width); |
|
// Adding new element to reshape table |
|
input_reshape_table.emplace(layer_name, input_dims); |
|
} |
|
|
|
static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg mm) { |
|
switch (mm.index()) { |
|
case cv::GMetaArg::index_of<cv::GMatDesc>(): |
|
{ |
|
ii->setPrecision(toIE(util::get<cv::GMatDesc>(mm).depth)); |
|
break; |
|
} |
|
case cv::GMetaArg::index_of<cv::GFrameDesc>(): |
|
{ |
|
const auto &meta = util::get<cv::GFrameDesc>(mm); |
|
switch (meta.fmt) { |
|
case cv::MediaFormat::NV12: |
|
ii->getPreProcess().setColorFormat(IE::ColorFormat::NV12); |
|
break; |
|
case cv::MediaFormat::BGR: |
|
// NB: Do nothing |
|
break; |
|
case cv::MediaFormat::GRAY: |
|
// NB: Do nothing |
|
break; |
|
default: |
|
GAPI_Assert(false && "Unsupported media format for IE backend"); |
|
} |
|
ii->setPrecision(toIE(CV_8U)); |
|
break; |
|
} |
|
default: |
|
util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); |
|
} |
|
} |
|
|
|
static bool isApplicableForResize(const IE::TensorDesc& desc) { |
|
const auto layout = desc.getLayout(); |
|
const auto prec = desc.getPrecision(); |
|
return (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) && |
|
(prec == IE::Precision::FP32 || prec == IE::Precision::U8); |
|
} |
|
|
|
static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii, |
|
const cv::GMetaArg& mm) { |
|
IE::PreProcessInfo info; |
|
if (cv::util::holds_alternative<cv::GFrameDesc>(mm)) { |
|
auto desc = cv::util::get<cv::GFrameDesc>(mm); |
|
if (desc.fmt == cv::MediaFormat::NV12) { |
|
info.setColorFormat(IE::ColorFormat::NV12); |
|
} |
|
} |
|
if (isApplicableForResize(ii->getTensorDesc())) { |
|
info.setResizeAlgorithm(IE::RESIZE_BILINEAR); |
|
} |
|
return info; |
|
} |
|
|
|
using namespace cv::gapi::ie::detail; |
|
static void configureOutputPrecision(const IE::OutputsDataMap &outputs_info, |
|
const ParamDesc::precision_variant_t &output_precision) { |
|
switch (output_precision.index()) { |
|
case ParamDesc::precision_variant_t::index_of<ParamDesc::precision_t>(): { |
|
auto precision = toIE(cv::util::get<ParamDesc::precision_t>(output_precision)); |
|
for (auto it : outputs_info) { |
|
it.second->setPrecision(precision); |
|
} |
|
break; |
|
} |
|
case ParamDesc::precision_variant_t::index_of<ParamDesc::precision_map_t>(): { |
|
const auto& precision_map = |
|
cv::util::get<ParamDesc::precision_map_t>(output_precision); |
|
for (auto it : precision_map) { |
|
outputs_info.at(it.first)->setPrecision(toIE(it.second)); |
|
} |
|
break; |
|
} |
|
case ParamDesc::precision_variant_t::index_of<cv::util::monostate>(): { |
|
// Do nothing; |
|
break; |
|
} |
|
} |
|
} |
|
|
|
// NB: This is a callback used by async infer |
|
// to post outputs blobs (cv::GMat's). |
|
static void PostOutputs(InferenceEngine::InferRequest &request, |
|
InferenceEngine::StatusCode code, |
|
std::shared_ptr<IECallContext> ctx) { |
|
GAPI_ITT_STATIC_LOCAL_HANDLE(ie_cb_post_outputs_hndl, "IE_async_callback_PostOutputs"); |
|
GAPI_ITT_AUTO_TRACE_GUARD(ie_cb_post_outputs_hndl); |
|
|
|
if (code != IE::StatusCode::OK) { |
|
std::stringstream ss; |
|
ss << "InferRequest for model: " << ctx->uu.params.model_path |
|
<< " finished with InferenceEngine::StatusCode: " << static_cast<int>(code); |
|
ctx->eptr = std::make_exception_ptr(std::logic_error(ss.str())); |
|
} |
|
|
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) { |
|
auto& out_mat = ctx->outMatR(i); |
|
IE::Blob::Ptr this_blob = request.GetBlob(ctx->uu.params.output_names[i]); |
|
copyFromIE(this_blob, out_mat); |
|
auto output = ctx->output(i); |
|
ctx->out.meta(output, ctx->input(0).meta); |
|
ctx->out.post(std::move(output), ctx->eptr); |
|
} |
|
|
|
ctx->views.clear(); |
|
ctx->releaseKeepAliveFrame(&request); |
|
} |
|
|
|
class PostOutputsList { |
|
public: |
|
PostOutputsList(size_t size, |
|
std::shared_ptr<IECallContext> ctx, |
|
std::vector<std::vector<int>>&& cached_dims); |
|
|
|
void operator()(InferenceEngine::InferRequest &request, |
|
InferenceEngine::StatusCode code, |
|
size_t pos) const; |
|
|
|
private: |
|
struct Priv { |
|
size_t size; |
|
std::atomic<size_t> finished{0u}; |
|
std::shared_ptr<IECallContext> ctx; |
|
std::vector<std::vector<int>> cached_dims; |
|
}; |
|
std::shared_ptr<Priv> m_priv; |
|
}; |
|
|
|
PostOutputsList::PostOutputsList(size_t size, |
|
std::shared_ptr<IECallContext> ctx, |
|
std::vector<std::vector<int>>&& cached_dims) |
|
: m_priv(new Priv()) { |
|
m_priv->size = size; |
|
m_priv->ctx = ctx; |
|
m_priv->cached_dims = std::move(cached_dims); |
|
} |
|
|
|
void PostOutputsList::operator()(InferenceEngine::InferRequest &req, |
|
InferenceEngine::StatusCode code, |
|
size_t pos) const { |
|
auto&& ctx = m_priv->ctx; |
|
auto&& cached_dims = m_priv->cached_dims; |
|
auto&& finished = m_priv->finished; |
|
auto&& size = m_priv->size; |
|
|
|
if (code != IE::StatusCode::OK) { |
|
ctx->eptr = std::make_exception_ptr( |
|
std::logic_error("IE::InferRequest finished with not OK status")); |
|
} |
|
|
|
if (!ctx->eptr) { |
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) { |
|
std::vector<cv::Mat> &out_vec = ctx->outVecR<cv::Mat>(i); |
|
|
|
IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); |
|
GAPI_Assert(out_blob); |
|
|
|
// FIXME: Avoid data copy. Not sure if it is possible though |
|
out_vec[pos].create(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); |
|
copyFromIE(out_blob, out_vec[pos]); |
|
} |
|
} |
|
++finished; |
|
|
|
if (finished == size) { |
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) { |
|
auto output = ctx->output(i); |
|
ctx->out.meta(output, ctx->input(0).meta); |
|
ctx->out.post(std::move(output), ctx->eptr); |
|
} |
|
} |
|
} |
|
|
|
struct Infer: public cv::detail::KernelTag { |
|
using API = cv::GInferBase; |
|
static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); } |
|
static KImpl kernel() { return KImpl{outMeta, run}; } |
|
|
|
static cv::GMetaArgs outMeta(const ade::Graph &gr, |
|
const ade::NodeHandle &nh, |
|
const cv::GMetaArgs &in_metas, |
|
const cv::GArgs &/*in_args*/) { |
|
// Specify network's output layer metadata to the framework |
|
// Also specify the input information to the IE from the framework |
|
// NB: Have no clue if network's input [dimensions] may ever define |
|
// its output dimensions. It seems possible with OpenCV DNN APIs |
|
|
|
cv::GMetaArgs result; |
|
|
|
GConstGIEModel gm(gr); |
|
const auto &uu = gm.metadata(nh).get<IEUnit>(); |
|
IE::ICNNNetwork::InputShapes input_reshape_table = uu.params.reshape_table; |
|
|
|
// Initialize input information |
|
// Note our input layers list order matches the API order and so |
|
// meta order. |
|
GAPI_Assert(uu.params.input_names.size() == in_metas.size() |
|
&& "Known input layers count doesn't match input meta count"); |
|
|
|
// NB: Configuring input/output precision and network reshape must be done |
|
// only in the loadNetwork case. |
|
using namespace cv::gapi::ie::detail; |
|
if (uu.params.kind == ParamDesc::Kind::Load) { |
|
auto inputs = uu.net.getInputsInfo(); |
|
for (auto &&it : ade::util::zip(ade::util::toRange(uu.params.input_names), |
|
ade::util::toRange(in_metas))) { |
|
const auto &input_name = std::get<0>(it); |
|
auto ii = inputs.at(input_name); |
|
const auto & mm = std::get<1>(it); |
|
|
|
configureInputInfo(ii, mm); |
|
if (uu.params.layer_names_to_reshape.find(input_name) != |
|
uu.params.layer_names_to_reshape.end()) { |
|
configureInputReshapeByImage(ii, mm, input_reshape_table); |
|
} |
|
|
|
if (isApplicableForResize(ii->getTensorDesc())) { |
|
ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); |
|
} |
|
|
|
// NB: configure input param for further preproc |
|
if (uu.net_input_params.is_applicable(mm)) { |
|
const_cast<IEUnit::InputFramesDesc &>(uu.net_input_params) |
|
.set_param(input_name, ii->getTensorDesc()); |
|
} |
|
} |
|
|
|
// FIXME: This isn't the best place to call reshape function. |
|
// Сorrect solution would be to do this in compile() method of network, |
|
// but now input meta isn't passed to compile() method. |
|
if (!input_reshape_table.empty()) { |
|
const_cast<IE::CNNNetwork *>(&uu.net)->reshape(input_reshape_table); |
|
} |
|
configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); |
|
} else { |
|
GAPI_Assert(uu.params.kind == ParamDesc::Kind::Import); |
|
auto inputs = uu.this_network.GetInputsInfo(); |
|
// FIXME: This isn't the best place to collect PreProcMap. |
|
auto* non_const_prepm = const_cast<IEUnit::PreProcMap*>(&uu.preproc_map); |
|
for (auto &&it : ade::util::zip(ade::util::toRange(uu.params.input_names), |
|
ade::util::toRange(in_metas))) { |
|
const auto &input_name = std::get<0>(it); |
|
auto ii = inputs.at(input_name); |
|
const auto & mm = std::get<1>(it); |
|
non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); |
|
|
|
// NB: configure input param for further preproc |
|
if (uu.net_input_params.is_applicable(mm)) { |
|
const_cast<IEUnit::InputFramesDesc &>(uu.net_input_params) |
|
.set_param(input_name, ii->getTensorDesc()); |
|
} |
|
} |
|
} |
|
|
|
// FIXME: It would be nice here to have an exact number of network's |
|
// input/output parameters. Probably GCall should store it here for us. |
|
// It doesn't, as far as I know.. |
|
for (const auto &out_name : uu.params.output_names) { |
|
// NOTE: our output_names vector follows the API order |
|
// of this operation's outputs |
|
const auto& desc = |
|
uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load |
|
? uu.net.getOutputsInfo().at(out_name)->getTensorDesc() |
|
: uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); |
|
|
|
cv::GMatDesc outm(toCV(desc.getPrecision()), |
|
toCV(desc.getDims())); |
|
result.emplace_back(outm); |
|
} |
|
return result; |
|
} |
|
|
|
static void run(std::shared_ptr<IECallContext> ctx, |
|
cv::gimpl::ie::RequestPool &reqPool) { |
|
using namespace std::placeholders; |
|
reqPool.execute( |
|
cv::gimpl::ie::RequestPool::Task { |
|
[ctx](InferenceEngine::InferRequest &req) { |
|
// non-generic version for now: |
|
// - assumes all inputs/outputs are always Mats |
|
for (auto i : ade::util::iota(ctx->uu.params.num_in)) { |
|
const auto& layer_name = ctx->uu.params.input_names[i]; |
|
auto layout = |
|
ctx->uu.this_network.GetInputsInfo(). |
|
at(layer_name)->getTensorDesc().getLayout(); |
|
auto hint = |
|
(layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) |
|
? cv::gapi::ie::TraitAs::IMAGE : cv::gapi::ie::TraitAs::TENSOR; |
|
|
|
IE::Blob::Ptr this_blob = extractBlob(*ctx, i, hint, |
|
layer_name, |
|
cv::util::optional<cv::Rect>{}); |
|
setBlob(req, layer_name, this_blob, *ctx); |
|
} |
|
// FIXME: Should it be done by kernel ? |
|
// What about to do that in RequestPool ? |
|
req.StartAsync(); |
|
}, |
|
std::bind(PostOutputs, _1, _2, ctx) |
|
} |
|
); |
|
} |
|
}; |
|
|
|
struct InferROI: public cv::detail::KernelTag { |
|
using API = cv::GInferROIBase; |
|
static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); } |
|
static KImpl kernel() { return KImpl{outMeta, run}; } |
|
|
|
static cv::GMetaArgs outMeta(const ade::Graph &gr, |
|
const ade::NodeHandle &nh, |
|
const cv::GMetaArgs &in_metas, |
|
const cv::GArgs &/*in_args*/) { |
|
cv::GMetaArgs result; |
|
|
|
GConstGIEModel gm(gr); |
|
const auto &uu = gm.metadata(nh).get<IEUnit>(); |
|
IE::ICNNNetwork::InputShapes input_reshape_table = uu.params.reshape_table; |
|
|
|
// Initialize input information |
|
// FIXME: So far it is pretty limited |
|
GAPI_Assert(1u == uu.params.input_names.size()); |
|
GAPI_Assert(2u == in_metas.size()); |
|
|
|
const auto &input_name = uu.params.input_names.at(0); |
|
auto &&mm = in_metas.at(1u); |
|
// NB: Configuring input precision and network reshape must be done |
|
// only in the loadNetwork case. |
|
if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
// 0th is ROI, 1st is input image |
|
auto ii = uu.net.getInputsInfo().at(input_name); |
|
configureInputInfo(ii, mm); |
|
if (uu.params.layer_names_to_reshape.find(input_name) != |
|
uu.params.layer_names_to_reshape.end()) { |
|
configureInputReshapeByImage(ii, mm, input_reshape_table); |
|
} |
|
if (isApplicableForResize(ii->getTensorDesc())) { |
|
ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); |
|
} |
|
|
|
// FIXME: This isn't the best place to call reshape function. |
|
// Сorrect solution would be to do this in compile() method of network, |
|
// but now input meta isn't passed to compile() method. |
|
if (!input_reshape_table.empty()) { |
|
const_cast<IE::CNNNetwork *>(&uu.net)->reshape(input_reshape_table); |
|
} |
|
|
|
// NB: configure input param for further preproc |
|
if (uu.net_input_params.is_applicable(mm)) { |
|
const_cast<IEUnit::InputFramesDesc &>(uu.net_input_params) |
|
.set_param(input_name, ii->getTensorDesc()); |
|
} |
|
configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); |
|
} else { |
|
GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); |
|
auto inputs = uu.this_network.GetInputsInfo(); |
|
// FIXME: This isn't the best place to collect PreProcMap. |
|
auto* non_const_prepm = const_cast<IEUnit::PreProcMap*>(&uu.preproc_map); |
|
auto ii = inputs.at(input_name); |
|
non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); |
|
|
|
// NB: configure intput param for further preproc |
|
if (uu.net_input_params.is_applicable(mm)) { |
|
const_cast<IEUnit::InputFramesDesc &>(uu.net_input_params) |
|
.set_param(input_name, ii->getTensorDesc()); |
|
} |
|
} |
|
|
|
// FIXME: It would be nice here to have an exact number of network's |
|
// input/output parameters. Probably GCall should store it here for us. |
|
// It doesn't, as far as I know.. |
|
for (const auto &out_name : uu.params.output_names) { |
|
// NOTE: our output_names vector follows the API order |
|
// of this operation's outputs |
|
const auto& desc = |
|
uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load |
|
? uu.net.getOutputsInfo().at(out_name)->getTensorDesc() |
|
: uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); |
|
|
|
cv::GMatDesc outm(toCV(desc.getPrecision()), |
|
toCV(desc.getDims())); |
|
result.emplace_back(outm); |
|
} |
|
return result; |
|
} |
|
|
|
static void run(std::shared_ptr<IECallContext> ctx, |
|
cv::gimpl::ie::RequestPool &reqPool) { |
|
using namespace std::placeholders; |
|
reqPool.execute( |
|
cv::gimpl::ie::RequestPool::Task { |
|
[ctx](InferenceEngine::InferRequest &req) { |
|
GAPI_Assert(ctx->uu.params.num_in == 1); |
|
auto&& this_roi = ctx->inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>(); |
|
|
|
// reserve unique slot for keep alive preprocessed frame |
|
cv::MediaFrame* slot_ptr = ctx->prepareKeepAliveFrameSlot(&req); |
|
|
|
// NB: This blob will be used to make roi from its, so |
|
// it should be treated as image |
|
bool preprocessed = false; |
|
IE::Blob::Ptr this_blob = |
|
extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, |
|
*(ctx->uu.params.input_names.begin()), |
|
cv::util::make_optional(this_roi), |
|
slot_ptr, &preprocessed); |
|
if (!preprocessed) { |
|
setROIBlob(req, |
|
*(ctx->uu.params.input_names.begin()), |
|
this_blob, this_roi, *ctx); |
|
} else { |
|
setBlob(req, |
|
*(ctx->uu.params.input_names.begin()), |
|
this_blob, *ctx); |
|
} |
|
// FIXME: Should it be done by kernel ? |
|
// What about to do that in RequestPool ? |
|
req.StartAsync(); |
|
}, |
|
std::bind(PostOutputs, _1, _2, ctx) |
|
} |
|
); |
|
} |
|
}; |
|
|
|
|
|
struct InferList: public cv::detail::KernelTag { |
|
using API = cv::GInferListBase; |
|
static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); } |
|
static KImpl kernel() { return KImpl{outMeta, run}; } |
|
|
|
static cv::GMetaArgs outMeta(const ade::Graph &gr, |
|
const ade::NodeHandle &nh, |
|
const cv::GMetaArgs &in_metas, |
|
const cv::GArgs &/*in_args*/) { |
|
// Specify the input information to the IE from the framework |
|
// NB: Have no clue if network's input [dimensions] may ever define |
|
// its output dimensions. It seems possible with OpenCV DNN APIs |
|
|
|
GConstGIEModel gm(gr); |
|
const auto &uu = gm.metadata(nh).get<IEUnit>(); |
|
IE::ICNNNetwork::InputShapes input_reshape_table = uu.params.reshape_table; |
|
|
|
// Initialize input information |
|
// Note our input layers list order matches the API order and so |
|
// meta order. |
|
GAPI_Assert(uu.params.input_names.size() == (in_metas.size() - 1u) |
|
&& "Known input layers count doesn't match input meta count"); |
|
|
|
// NB: Configuring input precision and network reshape must be done |
|
// only in the loadNetwork case. |
|
if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
std::size_t idx = 1u; |
|
auto inputs = uu.net.getInputsInfo(); |
|
for (auto &&input_name : uu.params.input_names) { |
|
auto ii = inputs.at(input_name); |
|
const auto & mm = in_metas[idx++]; |
|
configureInputInfo(ii, mm); |
|
if (uu.params.layer_names_to_reshape.find(input_name) != |
|
uu.params.layer_names_to_reshape.end()) { |
|
configureInputReshapeByImage(ii, mm, input_reshape_table); |
|
} |
|
if (isApplicableForResize(ii->getTensorDesc())) { |
|
ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); |
|
} |
|
} |
|
|
|
// FIXME: This isn't the best place to call reshape function. |
|
// Сorrect solution would be to do this in compile() method of network, |
|
// but now input meta isn't passed to compile() method. |
|
if (!input_reshape_table.empty()) { |
|
const_cast<IE::CNNNetwork *>(&uu.net)->reshape(input_reshape_table); |
|
} |
|
configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); |
|
} else { |
|
GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); |
|
std::size_t idx = 1u; |
|
auto inputs = uu.this_network.GetInputsInfo(); |
|
auto* non_const_prepm = const_cast<IEUnit::PreProcMap*>(&uu.preproc_map); |
|
for (auto &&input_name : uu.params.input_names) { |
|
auto ii = inputs.at(input_name); |
|
const auto & mm = in_metas[idx++]; |
|
non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); |
|
} |
|
} |
|
|
|
// roi-list version is much easier at the moment. |
|
// All our outputs are vectors which don't have |
|
// metadata at the moment - so just create a vector of |
|
// "empty" array metadatas of the required size. |
|
return cv::GMetaArgs(uu.params.output_names.size(), |
|
cv::GMetaArg{cv::empty_array_desc()}); |
|
} |
|
|
|
static void run(std::shared_ptr<IECallContext> ctx, |
|
cv::gimpl::ie::RequestPool &reqPool) { |
|
const auto& in_roi_vec = ctx->inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>(); |
|
// NB: In case there is no input data need to post output anyway |
|
if (in_roi_vec.empty()) { |
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) { |
|
auto output = ctx->output(i); |
|
ctx->out.meta(output, ctx->input(0).meta); |
|
ctx->out.post(std::move(output)); |
|
} |
|
return; |
|
} |
|
|
|
// NB: This blob will be used to make roi from its, so |
|
// it should be treated as image |
|
IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, |
|
ctx->uu.params.input_names[0u], |
|
cv::util::optional<cv::Rect>{}); |
|
|
|
std::vector<std::vector<int>> cached_dims(ctx->uu.params.num_out); |
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) { |
|
const auto& out_name = ctx->uu.params.output_names[i]; |
|
const auto& desc = |
|
ctx->uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load |
|
? ctx->uu.net.getOutputsInfo().at(out_name)->getTensorDesc() |
|
: ctx->uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); |
|
cached_dims[i] = toCV(desc.getDims()); |
|
// FIXME: Isn't this should be done automatically |
|
// by some resetInternalData(), etc? (Probably at the GExecutor level) |
|
auto& out_vec = ctx->outVecR<cv::Mat>(i); |
|
out_vec.clear(); |
|
out_vec.resize(in_roi_vec.size()); |
|
} |
|
|
|
PostOutputsList callback(in_roi_vec.size(), ctx, std::move(cached_dims)); |
|
for (auto&& it : ade::util::indexed(in_roi_vec)) { |
|
auto pos = ade::util::index(it); |
|
const auto& rc = ade::util::value(it); |
|
reqPool.execute( |
|
cv::gimpl::ie::RequestPool::Task { |
|
[ctx, rc, this_blob](InferenceEngine::InferRequest &req) { |
|
setROIBlob(req, ctx->uu.params.input_names[0u], this_blob, rc, *ctx); |
|
req.StartAsync(); |
|
}, |
|
std::bind(callback, std::placeholders::_1, std::placeholders::_2, pos) |
|
} |
|
); |
|
} |
|
} |
|
}; |
|
|
|
struct InferList2: public cv::detail::KernelTag { |
|
using API = cv::GInferList2Base; |
|
static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); } |
|
static KImpl kernel() { return KImpl{outMeta, run}; } |
|
|
|
static cv::GMetaArgs outMeta(const ade::Graph &gr, |
|
const ade::NodeHandle &nh, |
|
const cv::GMetaArgs &in_metas, |
|
const cv::GArgs &/*in_args*/) { |
|
// Specify the input information to the IE from the framework |
|
// NB: Have no clue if network's input [dimensions] may ever define |
|
// its output dimensions. It seems possible with OpenCV DNN APIs |
|
|
|
GConstGIEModel gm(gr); |
|
const auto &uu = gm.metadata(nh).get<IEUnit>(); |
|
IE::ICNNNetwork::InputShapes input_reshape_table = uu.params.reshape_table; |
|
|
|
// Initialize input information |
|
// Note our input layers list order matches the API order and so |
|
// meta order. |
|
GAPI_Assert(uu.params.input_names.size() == (in_metas.size() - 1u) |
|
&& "Known input layers count doesn't match input meta count"); |
|
|
|
const auto &op = gm.metadata(nh).get<Op>(); |
|
|
|
// In contrast to InferList, the InferList2 has only one |
|
// "full-frame" image argument, and all the rest are arrays of |
|
// ether ROI or blobs. So here we set the 0th arg image format |
|
// to all inputs which are ROI-based (skipping the |
|
// "blob"-based ones) |
|
// FIXME: this is filtering not done, actually! GArrayDesc has |
|
// no hint for its underlying type! |
|
const auto &mm_0 = in_metas[0u]; |
|
switch (in_metas[0u].index()) { |
|
case cv::GMetaArg::index_of<cv::GMatDesc>(): { |
|
const auto &meta_0 = util::get<cv::GMatDesc>(mm_0); |
|
GAPI_Assert( !meta_0.isND() |
|
&& !meta_0.planar |
|
&& "Only images are supported as the 0th argument"); |
|
break; |
|
} |
|
case cv::GMetaArg::index_of<cv::GFrameDesc>(): { |
|
// FIXME: Is there any validation for GFrame ? |
|
break; |
|
} |
|
default: |
|
util::throw_error(std::runtime_error("Unsupported input meta for IE backend")); |
|
} |
|
|
|
if (util::holds_alternative<cv::GMatDesc>(mm_0)) { |
|
const auto &meta_0 = util::get<cv::GMatDesc>(mm_0); |
|
GAPI_Assert( !meta_0.isND() |
|
&& !meta_0.planar |
|
&& "Only images are supported as the 0th argument"); |
|
} |
|
|
|
std::size_t idx = 1u; |
|
for (auto &&input_name : uu.params.input_names) { |
|
const auto &mm = in_metas[idx]; |
|
GAPI_Assert(util::holds_alternative<cv::GArrayDesc>(mm) |
|
&& "Non-array inputs are not supported"); |
|
|
|
if (op.k.inKinds[idx] == cv::detail::OpaqueKind::CV_RECT) { |
|
// NB: Configuring input precision and network reshape must be done |
|
// only in the loadNetwork case. |
|
if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { |
|
// This is a cv::Rect -- configure the IE preprocessing |
|
auto ii = uu.net.getInputsInfo().at(input_name); |
|
configureInputInfo(ii, mm_0); |
|
if (uu.params.layer_names_to_reshape.find(input_name) != |
|
uu.params.layer_names_to_reshape.end()) { |
|
configureInputReshapeByImage(ii, mm_0, input_reshape_table); |
|
} |
|
if (isApplicableForResize(ii->getTensorDesc())) { |
|
ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); |
|
} |
|
|
|
// FIXME: This isn't the best place to call reshape function. |
|
// Сorrect solution would be to do this in compile() method of network, |
|
// but now input meta isn't passed to compile() method. |
|
if (!input_reshape_table.empty()) { |
|
const_cast<IE::CNNNetwork *>(&uu.net)->reshape(input_reshape_table); |
|
} |
|
configureOutputPrecision(uu.net.getOutputsInfo(), uu.params.output_precision); |
|
} else { |
|
GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); |
|
auto inputs = uu.this_network.GetInputsInfo(); |
|
auto* non_const_prepm = const_cast<IEUnit::PreProcMap*>(&uu.preproc_map); |
|
auto ii = inputs.at(input_name); |
|
non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm_0)); |
|
} |
|
} else { |
|
// This is a cv::GMat (equals to: cv::Mat) |
|
// Just validate that it is really the type |
|
// (other types are prohibited here) |
|
GAPI_Assert(op.k.inKinds[idx] == cv::detail::OpaqueKind::CV_MAT); |
|
} |
|
idx++; // NB: Never forget to increment the counter |
|
} |
|
|
|
// roi-list version is much easier at the moment. |
|
// All our outputs are vectors which don't have |
|
// metadata at the moment - so just create a vector of |
|
// "empty" array metadatas of the required size. |
|
return cv::GMetaArgs(uu.params.output_names.size(), |
|
cv::GMetaArg{cv::empty_array_desc()}); |
|
} |
|
|
|
static void run(std::shared_ptr<IECallContext> ctx, |
|
cv::gimpl::ie::RequestPool &reqPool) { |
|
GAPI_Assert(ctx->inArgs().size() > 1u |
|
&& "This operation must have at least two arguments"); |
|
// NB: This blob will be used to make roi from its, so |
|
// it should be treated as image |
|
IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0, cv::gapi::ie::TraitAs::IMAGE, |
|
ctx->uu.params.input_names[0u], |
|
cv::util::optional<cv::Rect>{}); |
|
const auto list_size = ctx->inArg<cv::detail::VectorRef>(1u).size(); |
|
if (list_size == 0u) { |
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) { |
|
auto output = ctx->output(i); |
|
ctx->out.meta(output, ctx->input(0).meta); |
|
ctx->out.post(std::move(output)); |
|
} |
|
return; |
|
} |
|
// FIXME: This could be done ONCE at graph compile stage! |
|
std::vector< std::vector<int> > cached_dims(ctx->uu.params.num_out); |
|
for (auto i : ade::util::iota(ctx->uu.params.num_out)) { |
|
const auto& out_name = ctx->uu.params.output_names[i]; |
|
const auto& desc = |
|
ctx->uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load |
|
? ctx->uu.net.getOutputsInfo().at(out_name)->getTensorDesc() |
|
: ctx->uu.this_network.GetOutputsInfo().at(out_name)->getTensorDesc(); |
|
cached_dims[i] = toCV(desc.getDims()); |
|
// FIXME: Isn't this should be done automatically |
|
// by some resetInternalData(), etc? (Probably at the GExecutor level) |
|
auto& out_vec = ctx->outVecR<cv::Mat>(i); |
|
out_vec.clear(); |
|
out_vec.resize(list_size); |
|
} |
|
|
|
PostOutputsList callback(list_size, ctx, std::move(cached_dims)); |
|
for (const auto &list_idx : ade::util::iota(list_size)) { |
|
reqPool.execute( |
|
cv::gimpl::ie::RequestPool::Task { |
|
[ctx, list_idx, list_size, blob_0](InferenceEngine::InferRequest &req) { |
|
for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) { |
|
const auto &this_vec = ctx->inArg<cv::detail::VectorRef>(in_idx+1u); |
|
GAPI_Assert(this_vec.size() == list_size); |
|
if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) { |
|
const auto &vec = this_vec.rref<cv::Rect>(); |
|
setROIBlob(req, ctx->uu.params.input_names[in_idx], |
|
blob_0, vec[list_idx], *ctx); |
|
} else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { |
|
const auto &vec = this_vec.rref<cv::Mat>(); |
|
const auto &mat = vec[list_idx]; |
|
setBlob(req, ctx->uu.params.input_names[in_idx], |
|
wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR), |
|
*ctx); |
|
} else { |
|
GAPI_Assert(false && |
|
"Only Rect and Mat types are supported for infer list 2!"); |
|
} |
|
} |
|
req.StartAsync(); |
|
}, |
|
std::bind(callback, std::placeholders::_1, std::placeholders::_2, list_idx) |
|
} // task |
|
); |
|
} // for |
|
} |
|
}; |
|
|
|
} // namespace ie |
|
} // namespace gapi |
|
} // namespace cv |
|
|
|
|
|
// IE backend implementation of GBackend::Priv /////////////////////// |
|
namespace { |
|
class GIEBackendImpl final: public cv::gapi::GBackend::Priv { |
|
virtual void unpackKernel(ade::Graph &gr, |
|
const ade::NodeHandle &nh, |
|
const cv::GKernelImpl &ii) override { |
|
using namespace cv::gimpl; |
|
// FIXME: Introduce a DNNBackend interface which'd specify |
|
// the framework for this??? |
|
GIEModel gm(gr); |
|
auto &np = gm.metadata(nh).get<NetworkParams>(); |
|
auto &pp = cv::util::any_cast<cv::gapi::ie::detail::ParamDesc>(np.opaque); |
|
const auto &ki = cv::util::any_cast<KImpl>(ii.opaque); |
|
|
|
GModel::Graph model(gr); |
|
auto& op = model.metadata(nh).get<Op>(); |
|
|
|
// NB: In case generic infer, info about in/out names is stored in operation (op.params) |
|
if (pp.is_generic) |
|
{ |
|
auto& info = cv::util::any_cast<cv::detail::InOutInfo>(op.params); |
|
pp.input_names = info.in_names; |
|
pp.output_names = info.out_names; |
|
pp.num_in = info.in_names.size(); |
|
pp.num_out = info.out_names.size(); |
|
} |
|
|
|
gm.metadata(nh).set(IEUnit{pp}); |
|
gm.metadata(nh).set(IECallable{ki.run}); |
|
gm.metadata(nh).set(CustomMetaFunction{ki.customMetaFunc}); |
|
} |
|
|
|
virtual EPtr compile(const ade::Graph &graph, |
|
const cv::GCompileArgs &, |
|
const std::vector<ade::NodeHandle> &nodes) const override { |
|
return EPtr{new cv::gimpl::ie::GIEExecutable(graph, nodes)}; |
|
} |
|
|
|
virtual cv::GKernelPackage auxiliaryKernels() const override { |
|
return cv::gapi::kernels< cv::gimpl::ie::Infer |
|
, cv::gimpl::ie::InferROI |
|
, cv::gimpl::ie::InferList |
|
, cv::gimpl::ie::InferList2 |
|
>(); |
|
} |
|
|
|
virtual bool controlsMerge() const override { |
|
return true; |
|
} |
|
|
|
virtual bool allowsMerge(const cv::gimpl::GIslandModel::Graph &, |
|
const ade::NodeHandle &, |
|
const ade::NodeHandle &, |
|
const ade::NodeHandle &) const override { |
|
return false; |
|
} |
|
}; |
|
} |
|
|
|
cv::gapi::GBackend cv::gapi::ie::backend() { |
|
static cv::gapi::GBackend this_backend(std::make_shared<GIEBackendImpl>()); |
|
return this_backend; |
|
} |
|
|
|
cv::Mat cv::gapi::ie::util::to_ocv(IE::Blob::Ptr blob) { |
|
const auto& tdesc = blob->getTensorDesc(); |
|
return cv::Mat(toCV(tdesc.getDims()), |
|
toCV(tdesc.getPrecision()), |
|
blob->buffer().as<uint8_t*>()); |
|
} |
|
|
|
std::vector<int> cv::gapi::ie::util::to_ocv(const IE::SizeVector &dims) { |
|
return toCV(dims); |
|
} |
|
|
|
IE::Blob::Ptr cv::gapi::ie::util::to_ie(const cv::Mat &blob) { |
|
return wrapIE(blob, cv::gapi::ie::TraitAs::IMAGE); |
|
} |
|
|
|
IE::Blob::Ptr cv::gapi::ie::util::to_ie(const cv::Mat &y_plane, const cv::Mat &uv_plane) { |
|
auto y_blob = wrapIE(y_plane, cv::gapi::ie::TraitAs::IMAGE); |
|
auto uv_blob = wrapIE(uv_plane, cv::gapi::ie::TraitAs::IMAGE); |
|
#if INF_ENGINE_RELEASE >= 2021010000 |
|
return IE::make_shared_blob<IE::NV12Blob>(y_blob, uv_blob); |
|
#else |
|
return IE::make_shared_blob<InferenceEngine::NV12Blob>(y_blob, uv_blob); |
|
#endif |
|
} |
|
|
|
#else // HAVE_INF_ENGINE |
|
|
|
cv::gapi::GBackend cv::gapi::ie::backend() { |
|
// Still provide this symbol to avoid linking issues |
|
util::throw_error(std::runtime_error("G-API has been compiled without OpenVINO IE support")); |
|
} |
|
#endif // HAVE_INF_ENGINE
|
|
|