Merge pull request #24938 from DariaMityagina:icv/dm/add-media-frame-support-to-govbackend

G-API OV backend requires cv::MediaFrame #24938

### Pull Request Readiness Checklist

**Background_subtraction demo G-API issue. Update:**

Porting to API20 resulted in an error (both for CPU and NPU):
```
[ERROR] OpenCV(4.9.0-dev) /home/runner/work/open_model_zoo/open_model_zoo/cache/opencv/modules/gapi/src/backends/ov/govbackend.cpp:813: error: (-215: assertion not done ) cv::util::holds_alternative<cv::GMatDesc>(input_meta) in function 'cfgPreProcessing'
```

Adding cv::MediaFrame support to govbackend resulted in the following (tested with CPU):
<img width="941" alt="image" src="https://github.com/opencv/opencv/assets/52502732/3a003d61-bda7-4b1e-9117-3410cda1ba32">

### TODO

- [ ] **As part of the review process [this comment](https://github.com/opencv/opencv/pull/24938#discussion_r1487694043) was addressed which make it impossible to run the demo. I will bring those changes back in a separate PR [support `PartialShape`]**

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/25486/head
Daria Mityagina 7 months ago committed by GitHub
parent 7e56908306
commit ebea65777f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 166
      modules/gapi/src/backends/ov/govbackend.cpp
  2. 166
      modules/gapi/test/infer/gapi_infer_ov_tests.cpp

@ -129,7 +129,7 @@ static int toCV(const ov::element::Type &type) {
static void copyFromOV(const ov::Tensor &tensor, cv::Mat &mat) {
const auto total = mat.total() * mat.channels();
if (toCV(tensor.get_element_type()) != mat.depth() ||
tensor.get_size() != total ) {
tensor.get_size() != total) {
std::stringstream ss;
ss << "Failed to copy data from ov::Tensor to cv::Mat."
<< " Data type or number of elements mismatch."
@ -151,6 +151,30 @@ static void copyFromOV(const ov::Tensor &tensor, cv::Mat &mat) {
}
}
cv::Mat wrapOV(const cv::MediaFrame::View& view,
const cv::GFrameDesc& desc) {
cv::Mat out;
switch (desc.fmt) {
case cv::MediaFormat::BGR: {
out = cv::Mat(desc.size, CV_8UC3, view.ptr[0], view.stride[0]);
return out;
}
case cv::MediaFormat::NV12: {
auto y_plane = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]);
auto uv_plane = cv::Mat(desc.size / 2, CV_8UC2, view.ptr[1], view.stride[1]);
cvtColorTwoPlane(y_plane, uv_plane, out, cv::COLOR_YUV2BGR_NV12);
return out;
}
case cv::MediaFormat::GRAY: {
out = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]);
return out;
}
default:
GAPI_Error("OV Backend: Unsupported media format");
}
return out;
}
static void copyToOV(const cv::Mat &mat, ov::Tensor &tensor) {
// TODO: Ideally there should be check that mat and tensor
// dimensions are compatible.
@ -177,6 +201,12 @@ static void copyToOV(const cv::Mat &mat, ov::Tensor &tensor) {
}
}
static void copyToOV(const cv::MediaFrame &frame, ov::Tensor &tensor) {
const auto view = cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R));
auto matFromFrame = wrapOV(view, frame.desc());
copyToOV(matFromFrame, tensor);
}
std::vector<int> cv::gapi::ov::util::to_ocv(const ::ov::Shape &shape) {
return toCV(shape);
}
@ -269,8 +299,9 @@ public:
}
// Syntax sugar
cv::GShape inShape(std::size_t input) const;
const cv::Mat& inMat (std::size_t input) const;
cv::GShape inShape (std::size_t input) const;
const cv::Mat& inMat (std::size_t input) const;
const cv::MediaFrame& inFrame (std::size_t input) const;
cv::GRunArgP output (std::size_t idx);
cv::Mat& outMatR(std::size_t idx);
@ -355,6 +386,10 @@ const cv::Mat& OVCallContext::inMat(std::size_t input) const {
return inArg<cv::Mat>(input);
}
const cv::MediaFrame& OVCallContext::inFrame(std::size_t input) const {
return inArg<cv::MediaFrame>(input);
}
cv::Mat& OVCallContext::outMatR(std::size_t idx) {
return *cv::util::get<cv::Mat*>(m_results.at(idx));
}
@ -394,6 +429,8 @@ cv::GArg OVCallContext::packArg(const cv::GArg &arg) {
// (and constructed by either bindIn/Out or resetInternal)
case cv::GShape::GOPAQUE: return cv::GArg(m_res.slot<cv::detail::OpaqueRef>().at(ref.id));
case cv::GShape::GFRAME: return cv::GArg(m_res.slot<cv::MediaFrame>()[ref.id]);
default:
cv::util::throw_error(std::logic_error("Unsupported GShape type"));
break;
@ -655,6 +692,19 @@ void PostOutputsList::operator()(::ov::InferRequest &infer_request,
}
}
static void copyToOV(std::shared_ptr<OVCallContext> ctx, uint32_t input_idx, ov::Tensor &tensor) {
switch (ctx->inShape(input_idx)) {
case cv::GShape::GMAT:
copyToOV(ctx->inMat(input_idx), tensor);
break;
case cv::GShape::GFRAME:
copyToOV(ctx->inFrame(input_idx), tensor);
break;
default:
GAPI_Assert("Unsupported input shape for OV backend");
}
}
namespace cv {
namespace gimpl {
namespace ov {
@ -730,6 +780,37 @@ static cv::Mat preprocess(const cv::Mat &in_mat,
return out;
}
// NB: This function is used to preprocess input image
// for InferROI, InferList, InferList2 kernels.
cv::Mat preprocess(MediaFrame::View& view,
const cv::GFrameDesc& desc,
const cv::Rect& roi,
const ::ov::Shape &model_shape) {
return preprocess(wrapOV(view, desc), roi, model_shape);
}
static void preprocess_and_copy(std::shared_ptr<OVCallContext> ctx,
uint32_t input_idx,
const cv::Rect &roi,
const ::ov::Shape &model_shape,
::ov::Tensor& tensor) {
switch (ctx->inShape(input_idx)) {
case cv::GShape::GMAT: {
auto roi_mat = preprocess(ctx->inMat(input_idx), roi, model_shape);
copyToOV(roi_mat, tensor);
break;
}
case cv::GShape::GFRAME: {
auto currentFrame = ctx->inFrame(input_idx);
auto view = cv::MediaFrame::View(currentFrame.access(cv::MediaFrame::Access::R));
auto roi_mat = preprocess(view, currentFrame.desc(), roi, model_shape);
copyToOV(roi_mat, tensor);
}
default:
GAPI_Assert("Unsupported input shape for OV backend");
}
}
static bool isImage(const cv::GMatDesc &desc,
const ::ov::Shape &model_shape) {
return (model_shape.size() == 4u) &&
@ -739,6 +820,16 @@ static bool isImage(const cv::GMatDesc &desc,
(desc.depth == CV_8U);
}
static bool isImage(const cv::GMetaArg &meta,
const ::ov::Shape &shape) {
if (cv::util::holds_alternative<GFrameDesc>(meta)) {
return true;
}
GAPI_Assert(cv::util::holds_alternative<GMatDesc>(meta));
auto matdesc = cv::util::get<GMatDesc>(meta);
return isImage(matdesc, shape);
}
class PrePostProcWrapper {
public:
PrePostProcWrapper(std::shared_ptr<::ov::Model> &model,
@ -821,9 +912,8 @@ public:
void cfgPreProcessing(const std::string &input_name,
const cv::GMetaArg &input_meta,
const bool disable_img_resize = false) {
GAPI_Assert(cv::util::holds_alternative<cv::GMatDesc>(input_meta));
const auto &matdesc = cv::util::get<cv::GMatDesc>(input_meta);
GAPI_Assert(cv::util::holds_alternative<cv::GMatDesc>(input_meta) ||
cv::util::holds_alternative<cv::GFrameDesc>(input_meta));
const auto explicit_in_tensor_layout = lookUp(m_input_tensor_layout, input_name);
const auto explicit_in_model_layout = lookUp(m_input_model_layout, input_name);
const auto explicit_resize = lookUp(m_interpolation, input_name);
@ -838,24 +928,35 @@ public:
const auto &input_shape = m_model->input(input_name).get_shape();
auto &input_info = m_ppp.input(input_name);
m_ppp.input(input_name).tensor().set_element_type(toOV(matdesc.depth));
if (isImage(matdesc, input_shape)) {
auto isMat = cv::util::holds_alternative<cv::GMatDesc>(input_meta);
auto prec = isMat ? cv::util::get<cv::GMatDesc>(input_meta).depth : CV_8U;
m_ppp.input(input_name).tensor().set_element_type(toOV(prec));
const auto &matdesc = isMat ? cv::util::get<cv::GMatDesc>(input_meta) : cv::GMatDesc();
const auto &framedesc = !isMat ? cv::util::get<cv::GFrameDesc>(input_meta) : cv::GFrameDesc();
if (isImage(input_meta, input_shape)) {
// NB: Image case - all necessary preprocessng is configured automatically.
GAPI_LOG_DEBUG(NULL, "OV Backend: Input: \"" << input_name << "\" is image.");
if (explicit_in_tensor_layout &&
*explicit_in_tensor_layout != "NHWC") {
if (explicit_in_tensor_layout && *explicit_in_tensor_layout != "NHWC") {
std::stringstream desc_str;
if (isMat) {
desc_str << matdesc;
} else {
desc_str << framedesc;
}
std::stringstream ss;
ss << "OV Backend: Provided tensor layout " << *explicit_in_tensor_layout
<< " is not compatible with input data " << matdesc << " for layer \""
<< input_name << "\". Expecting NHWC";
<< " is not compatible with input data " << desc_str.str() << " for layer \""
<< input_name << "\". Expecting NHWC";
util::throw_error(std::logic_error(ss.str()));
} else {
input_info.tensor().set_layout(::ov::Layout("NHWC"));
}
if (!disable_img_resize) {
input_info.tensor().set_spatial_static_shape(matdesc.size.height,
matdesc.size.width);
const auto size = isMat ? cv::util::get<cv::GMatDesc>(input_meta).size : cv::util::get<cv::GFrameDesc>(input_meta).size;
input_info.tensor().set_spatial_static_shape(size.height,
size.width);
// NB: Even though resize is automatically configured
// user have an opportunity to specify the interpolation algorithm.
auto interp = explicit_resize
@ -877,8 +978,8 @@ public:
if (!explicit_in_tensor_layout && model_layout.empty()) {
std::stringstream ss;
ss << "Resize for input layer: " << input_name
<< "can't be configured."
<< " Failed to extract H and W positions from layout.";
<< "can't be configured."
<< " Failed to extract H and W positions from layout.";
util::throw_error(std::logic_error(ss.str()));
} else {
const auto layout = explicit_in_tensor_layout
@ -982,7 +1083,6 @@ struct Infer: public cv::detail::KernelTag {
ade::util::toRange(in_metas))) {
const auto &input_name = std::get<0>(it);
const auto &mm = std::get<1>(it);
ppp.cfgLayouts(input_name);
ppp.cfgPreProcessing(input_name, mm);
ppp.cfgScaleMean(input_name, mm);
@ -1025,7 +1125,7 @@ struct Infer: public cv::detail::KernelTag {
auto input_tensor = infer_request.get_tensor(input_name);
// TODO: In some cases wrapping existing data pointer
// might be faster than copy. Make it a strategy.
copyToOV(ctx->inMat(i), input_tensor);
copyToOV(ctx, i, input_tensor);
}
},
std::bind(PostOutputs, _1, _2, ctx)
@ -1054,13 +1154,13 @@ struct InferROI: public cv::detail::KernelTag {
const auto &input_name = uu.params.input_names.at(0);
const auto &mm = in_metas.at(1u);
GAPI_Assert(cv::util::holds_alternative<cv::GMatDesc>(mm));
const auto &matdesc = cv::util::get<cv::GMatDesc>(mm);
GAPI_Assert(cv::util::holds_alternative<cv::GMatDesc>(mm) ||
cv::util::holds_alternative<cv::GFrameDesc>(mm));
const bool is_model = cv::util::holds_alternative<ParamDesc::Model>(uu.params.kind);
const auto &input_shape = is_model ? uu.model->input(input_name).get_shape()
: uu.compiled_model.input(input_name).get_shape();
if (!isImage(matdesc, input_shape)) {
if (!isImage(mm, input_shape)) {
util::throw_error(std::runtime_error(
"OV Backend: InferROI supports only image as the 1th argument"));
}
@ -1111,8 +1211,7 @@ struct InferROI: public cv::detail::KernelTag {
auto input_tensor = infer_request.get_tensor(input_name);
const auto &shape = input_tensor.get_shape();
const auto &roi = ctx->inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
const auto roi_mat = preprocess(ctx->inMat(1), roi, shape);
copyToOV(roi_mat, input_tensor);
preprocess_and_copy(ctx, 1, roi, shape, input_tensor);
},
std::bind(PostOutputs, _1, _2, ctx)
}
@ -1147,11 +1246,11 @@ struct InferList: public cv::detail::KernelTag {
size_t idx = 1u;
for (auto &&input_name : uu.params.input_names) {
const auto &mm = in_metas[idx++];
GAPI_Assert(cv::util::holds_alternative<cv::GMatDesc>(mm));
const auto &matdesc = cv::util::get<cv::GMatDesc>(mm);
GAPI_Assert(cv::util::holds_alternative<cv::GMatDesc>(mm) ||
cv::util::holds_alternative<cv::GFrameDesc>(mm));
const auto &input_shape = uu.model->input(input_name).get_shape();
if (!isImage(matdesc, input_shape)) {
if (!isImage(mm, input_shape)) {
util::throw_error(std::runtime_error(
"OV Backend: Only image is supported"
" as the " + std::to_string(idx) + "th argument for InferList"));
@ -1208,8 +1307,7 @@ struct InferList: public cv::detail::KernelTag {
const auto &input_name = ctx->uu.params.input_names[0];
auto input_tensor = infer_request.get_tensor(input_name);
const auto &shape = input_tensor.get_shape();
const auto roi_mat = preprocess(ctx->inMat(1), rc, shape);
copyToOV(roi_mat, input_tensor);
preprocess_and_copy(ctx, 1, rc, shape, input_tensor);
},
std::bind(callback, std::placeholders::_1, std::placeholders::_2, pos)
}
@ -1247,12 +1345,18 @@ struct InferList2: public cv::detail::KernelTag {
const auto &input_name_0 = uu.params.input_names.front();
const auto &mm_0 = in_metas[0u];
const auto &matdesc = cv::util::get<cv::GMatDesc>(mm_0);
if (!(cv::util::holds_alternative<cv::GMatDesc>(mm_0) ||
cv::util::holds_alternative<cv::GFrameDesc>(mm_0))) {
util::throw_error(std::runtime_error(
"OV Backend: Unsupported input meta"
" for 0th argument in OV backend"));
}
const bool is_model = cv::util::holds_alternative<ParamDesc::Model>(uu.params.kind);
const auto &input_shape = is_model ? uu.model->input(input_name_0).get_shape()
: uu.compiled_model.input(input_name_0).get_shape();
if (!isImage(matdesc, input_shape)) {
if (!isImage(mm_0, input_shape)) {
util::throw_error(std::runtime_error(
"OV Backend: InferList2 supports only image as the 0th argument"));
}

@ -319,8 +319,174 @@ struct TestAgeGenderListOV : public BaseAgeGenderOV {
}
};
class TestMediaBGR final: public cv::MediaFrame::IAdapter {
cv::Mat m_mat;
using Cb = cv::MediaFrame::View::Callback;
Cb m_cb;
public:
explicit TestMediaBGR(cv::Mat m, Cb cb = [](){})
: m_mat(m), m_cb(cb) {
}
cv::GFrameDesc meta() const override {
return cv::GFrameDesc{cv::MediaFormat::BGR, cv::Size(m_mat.cols, m_mat.rows)};
}
cv::MediaFrame::View access(cv::MediaFrame::Access) override {
cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr };
cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u };
return cv::MediaFrame::View(std::move(pp), std::move(ss), Cb{m_cb});
}
};
struct MediaFrameTestAgeGenderOV: public ::testing::Test {
MediaFrameTestAgeGenderOV() {
initDLDTDataPath();
xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
device = "CPU";
blob_path = "age-gender-recognition-retail-0013.blob";
cv::Size sz{62, 62};
m_in_mat = cv::Mat(sz, CV_8UC3);
cv::resize(m_in_mat, m_in_mat, sz);
m_in_y = cv::Mat{sz, CV_8UC1};
cv::randu(m_in_y, 0, 255);
m_in_uv = cv::Mat{sz / 2, CV_8UC2};
cv::randu(m_in_uv, 0, 255);
}
cv::Mat m_in_y;
cv::Mat m_in_uv;
cv::Mat m_in_mat;
cv::Mat m_out_ov_age;
cv::Mat m_out_ov_gender;
cv::Mat m_out_gapi_age;
cv::Mat m_out_gapi_gender;
std::string xml_path;
std::string bin_path;
std::string blob_path;
std::string device;
std::string image_path;
using AGInfo = std::tuple<cv::GMat, cv::GMat>;
G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "typed-age-gender");
void validate() {
normAssert(m_out_ov_age, m_out_gapi_age, "0: Test age output");
normAssert(m_out_ov_gender, m_out_gapi_gender, "0: Test gender output");
}
}; // MediaFrameTestAgeGenderOV
} // anonymous namespace
TEST_F(MediaFrameTestAgeGenderOV, InferMediaInputBGR)
{
// OpenVINO
AGNetOVComp ref(xml_path, bin_path, device);
ref.cfgPrePostProcessing([](ov::preprocess::PrePostProcessor &ppp) {
ppp.input().tensor().set_element_type(ov::element::u8);
ppp.input().tensor().set_layout("NHWC");
});
ref.compile()(m_in_mat, m_out_ov_age, m_out_ov_gender);
// G-API
cv::GFrame in;
cv::GMat age, gender;
std::tie(age, gender) = cv::gapi::infer<AgeGender>(in);
cv::GComputation comp{cv::GIn(in), cv::GOut(age, gender)};
auto frame = MediaFrame::Create<TestMediaBGR>(m_in_mat);
auto pp = cv::gapi::ov::Params<AgeGender> {
xml_path, bin_path, device
}.cfgOutputLayers({ "age_conv3", "prob" });
comp.apply(cv::gin(frame),
cv::gout(m_out_gapi_age, m_out_gapi_gender),
cv::compile_args(cv::gapi::networks(pp)));
validate();
}
TEST_F(MediaFrameTestAgeGenderOV, InferROIGenericMediaInputBGR) {
// OpenVINO
cv::Rect roi(cv::Rect(cv::Point{20, 25}, cv::Size{16, 16}));
auto frame = MediaFrame::Create<TestMediaBGR>(m_in_mat);
static constexpr const char* tag = "age-gender-generic";
// OpenVINO
AGNetOVComp ref(xml_path, bin_path, device);
ref.cfgPrePostProcessing([](ov::preprocess::PrePostProcessor &ppp) {
ppp.input().tensor().set_element_type(ov::element::u8);
ppp.input().tensor().set_layout("NHWC");
});
ref.compile()(m_in_mat, roi, m_out_ov_age, m_out_ov_gender);
// G-API
cv::GFrame in;
cv::GOpaque<cv::Rect> rr;
GInferInputs inputs;
inputs["data"] = in;
auto outputs = cv::gapi::infer<cv::gapi::Generic>(tag, rr, inputs);
auto age = outputs.at("age_conv3");
auto gender = outputs.at("prob");
cv::GComputation comp{cv::GIn(in, rr), cv::GOut(age, gender)};
auto pp = AGNetROIGenComp::params(xml_path, bin_path, device);
comp.apply(cv::gin(frame, roi), cv::gout(m_out_gapi_age, m_out_gapi_gender),
cv::compile_args(cv::gapi::networks(pp)));
validate();
}
class TestMediaNV12 final: public cv::MediaFrame::IAdapter {
cv::Mat m_y;
cv::Mat m_uv;
public:
TestMediaNV12(cv::Mat y, cv::Mat uv) : m_y(y), m_uv(uv) {
}
cv::GFrameDesc meta() const override {
return cv::GFrameDesc{cv::MediaFormat::NV12, cv::Size(m_y.cols, m_y.rows)};
}
cv::MediaFrame::View access(cv::MediaFrame::Access) override {
cv::MediaFrame::View::Ptrs pp = {
m_y.ptr(), m_uv.ptr(), nullptr, nullptr
};
cv::MediaFrame::View::Strides ss = {
m_y.step, m_uv.step, 0u, 0u
};
return cv::MediaFrame::View(std::move(pp), std::move(ss));
}
};
TEST_F(MediaFrameTestAgeGenderOV, TestMediaNV12AgeGenderOV)
{
cv::GFrame in;
cv::GOpaque<cv::Rect> rr;
GInferInputs inputs;
inputs["data"] = in;
static constexpr const char* tag = "age-gender-generic";
auto outputs = cv::gapi::infer<cv::gapi::Generic>(tag, rr, inputs);
auto age = outputs.at("age_conv3");
auto gender = outputs.at("prob");
cv::GComputation comp{cv::GIn(in, rr), cv::GOut(age, gender)};
auto frame = MediaFrame::Create<TestMediaNV12>(m_in_y, m_in_uv);
auto pp = AGNetROIGenComp::params(xml_path, bin_path, device);
cv::Rect roi(cv::Rect(cv::Point{20, 25}, cv::Size{16, 16}));
EXPECT_NO_THROW(comp.apply(cv::gin(frame, roi),
cv::gout(m_out_gapi_age, m_out_gapi_gender),
cv::compile_args(cv::gapi::networks(pp))));
}
// TODO: Make all of tests below parmetrized to avoid code duplication
TEST_F(TestAgeGenderOV, Infer_Tensor) {
const auto in_mat = getRandomTensor({1, 3, 62, 62}, CV_32F);

Loading…
Cancel
Save