// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // // Copyright (C) 2018-2019, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. #include "precomp.hpp" #include #include "ie_ngraph.hpp" #include #ifdef HAVE_DNN_NGRAPH #include #endif // HAVE_DNN_NGRAPH #include #include #include "opencv2/core/utils/filesystem.hpp" #include "opencv2/core/utils/filesystem.private.hpp" namespace cv { namespace dnn { #ifdef HAVE_DNN_NGRAPH static bool DNN_IE_SERIALIZE = utils::getConfigurationParameterBool("OPENCV_DNN_IE_SERIALIZE", false); // For networks with input layer which has an empty name, IE generates a name id[some_number]. // OpenCV lets users use an empty input name and to prevent unexpected naming, // we can use some predefined name. static std::string kDefaultInpLayerName = "opencv_ngraph_empty_inp_layer_name"; static constexpr const char* kOpenCVLayersType = "opencv_ngraph_layer"; static std::vector > ngraphWrappers(const std::vector >& ptrs) { std::vector > wrappers(ptrs.size()); for (int i = 0; i < ptrs.size(); ++i) { CV_Assert(!ptrs[i].empty()); wrappers[i] = ptrs[i].dynamicCast(); CV_Assert(!wrappers[i].empty()); } return wrappers; } class NgraphCustomOp: public ov::op::Op { public: OPENVINO_OP(kOpenCVLayersType); NgraphCustomOp(const ov::OutputVector& inputs, Ptr& cvLayer, const std::vector& outputs, const std::vector& internals): Op(inputs), cvLayer(cvLayer), outputs(outputs), internals(internals) { constructor_validate_and_infer_types(); } void validate_and_infer_types() override { set_output_size(outputs.size()); for (int i = 0; i < outputs.size(); ++i) { ov::PartialShape shape; for (int j = 0; j < outputs[i].dims; ++j) { shape.push_back(outputs[i].size[j]); } set_output_type(i, get_input_element_type(0), shape); } } std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override { return std::make_shared(new_args, cvLayer, outputs, internals); } bool has_evaluate() const { return true; } bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override { std::vector inpMats, outMats; infEngineBlobsToMats(inputs, inpMats); infEngineBlobsToMats(outputs, outMats); try { cvLayer->forward(inpMats, outMats, internals); return true; } catch (...) { return false; } } Ptr& cvLayer; std::vector outputs, internals; }; InfEngineNgraphNode::InfEngineNgraphNode(ov::Output&& _node) : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) { CV_Assert(node.get_node()); CV_Assert(node.get_node_shared_ptr()); } InfEngineNgraphNode::InfEngineNgraphNode(const ov::Output& _node) : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) { CV_Assert(node.get_node()); CV_Assert(node.get_node_shared_ptr()); } InfEngineNgraphNode::InfEngineNgraphNode(const std::vector >& nodes, Ptr& cvLayer_, std::vector& inputs, std::vector& outputs, std::vector& internals) : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), cvLayer(cvLayer_) { ov::OutputVector inp_nodes; for (const auto& node : nodes) inp_nodes.emplace_back(node.dynamicCast()->node); node = std::make_shared(inp_nodes, cvLayer, outputs, internals); CV_Assert(!cvLayer->name.empty()); setName(cvLayer->name); } void InfEngineNgraphNode::setName(const std::string& name) { node.get_node()->set_friendly_name(name); } InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl) : netImpl_(netImpl) { hasNetOwner = false; device_name = "CPU"; } InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, std::shared_ptr& net) : netImpl_(netImpl) , cnn(net) { hasNetOwner = true; device_name = "CPU"; } void InfEngineNgraphNet::addOutput(const Ptr& node) { CV_Assert(node); const std::string& name = node->node.get_node()->get_friendly_name(); requestedOutputs.insert({name, node.get()}); } void InfEngineNgraphNet::createNet(Target targetId) { if (!hasNetOwner) { CV_Assert(!requestedOutputs.empty()); ov::ResultVector outs; for (auto output_node_it = requestedOutputs.begin(); output_node_it != requestedOutputs.end(); ++output_node_it) { CV_LOG_DEBUG(NULL, "DNN/NGRAPH: Add 'Result' output: " << output_node_it->first); CV_Assert(output_node_it->second); auto out = std::make_shared(output_node_it->second->node); std::string name = output_node_it->first + (output_node_it->second->node.get_node()->get_output_size() == 1 ? "" : ".0") CV_LOG_DEBUG(NULL, "DNN-IE: Change friendly name from " << out->get_friendly_name() << " to " << name); out->set_friendly_name(name); outs.push_back(out); } CV_Assert_N(!inputs_vec.empty(), !outs.empty()); ngraph_function = std::make_shared(outs, inputs_vec); init(targetId); } } void InfEngineNgraphNet::init(Target targetId) { if (!hasNetOwner) { if (targetId == DNN_TARGET_OPENCL_FP16) { ov::pass::ConvertFP32ToFP16().run_on_model(ngraph_function); } cnn = ngraph_function; if (DNN_IE_SERIALIZE) { #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP std::string dumpFileNameBase = netImpl_.getDumpFileNameBase(); try { ov::pass::Serialize(dumpFileNameBase + "_ngraph.xml", dumpFileNameBase + "_ngraph.bin").run_on_model(cnn); } catch (const std::exception& e) { std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out); out << "Exception: " << e.what() << std::endl; } catch (...) { std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out); out << "Can't dump: unknown exception" << std::endl; } #endif } } switch (targetId) { case DNN_TARGET_CPU: device_name = "CPU"; break; case DNN_TARGET_OPENCL: case DNN_TARGET_OPENCL_FP16: device_name = "GPU"; break; case DNN_TARGET_MYRIAD: device_name = "MYRIAD"; break; case DNN_TARGET_HDDL: device_name = "HDDL"; break; case DNN_TARGET_FPGA: device_name = "FPGA"; break; default: CV_Error(Error::StsNotImplemented, "Unknown target"); }; ov::preprocess::PrePostProcessor ppp(cnn); int i = 0; for (const auto& inp : cnn->inputs()) { // TODO: not sure why but ngraph_function->inputs() here causes segfault. const std::string& name = inp.get_node()->get_friendly_name(); auto blobIt = allBlobs.find(name); CV_Assert(blobIt != allBlobs.end()); auto srcT = blobIt->second.get_element_type(); if (srcT != inp.get_node()->get_element_type()) { ppp.input(i++).tensor().set_element_type(srcT); } } i = 0; for (const auto& it : cnn->outputs()) { const std::string& name = it.get_node()->get_friendly_name(); auto blobIt = allBlobs.find(name); CV_Assert(blobIt != allBlobs.end()); const auto& src = blobIt->second; // A workaround for single dimension output for which OpenCV allocates 2d Mat. // For example, face-detection-0105 with Result of shape {200} while output blob is {200, 1} auto outShape = it.get_partial_shape().get_max_shape(); if (outShape != src.get_shape()) { size_t sz = std::accumulate(outShape.begin(), outShape.end(), 1, std::multiplies()); CV_Assert(sz == src.get_size()); allBlobs[name] = ov::Tensor(src.get_element_type(), outShape, src.data()); } ppp.output(i++).tensor().set_element_type(src.get_element_type()); } ppp.build(); initPlugin(cnn); } ov::ParameterVector InfEngineNgraphNet::setInputs(const std::vector& inputs, const std::vector& names) { CV_Assert_N(inputs.size() == names.size()); ov::ParameterVector current_inp; for (size_t i = 0; i < inputs.size(); i++) { std::vector shape = getShape(inputs[i]); auto inp = std::make_shared(ov::element::f32, ov::Shape(shape)); inp->set_friendly_name(names[i]); auto it = std::find_if(inputs_vec.begin(), inputs_vec.end(), [&inp](const std::shared_ptr& a) { return a->get_friendly_name() == inp->get_friendly_name(); }); if (it == inputs_vec.end()) { inputs_vec.push_back(inp); current_inp.push_back(inp); } else { current_inp.push_back(*it); } } return current_inp; } void InfEngineNgraphNet::initPlugin(std::shared_ptr& net) { CV_Assert(!isInitialized()); try { AutoLock lock(getInitializationMutex()); ov::Core& ie = getCore(device_name); { isInit = true; std::vector candidates; std::string param_pluginPath = utils::getConfigurationParameterString("OPENCV_DNN_IE_EXTRA_PLUGIN_PATH", ""); if (!param_pluginPath.empty()) { candidates.push_back(param_pluginPath); } bool found = false; for (size_t i = 0; i != candidates.size(); ++i) { const std::string& libName = candidates[i]; try { ie.add_extension(libName); CV_LOG_INFO(NULL, "DNN-IE: Loaded extension plugin: " << libName); found = true; break; } catch(...) {} } if (!found && !candidates.empty()) { CV_LOG_WARNING(NULL, "DNN-IE: Can't load extension plugin (extra layers for some networks). Specify path via OPENCV_DNN_IE_EXTRA_PLUGIN_PATH parameter"); } #ifndef _WIN32 // Limit the number of CPU threads. if (device_name == "CPU") ie.set_property(device_name, ov::inference_num_threads(getNumThreads())); #endif if (device_name.find("GPU") == 0) { #if OPENCV_HAVE_FILESYSTEM_SUPPORT std::string cache_path = utils::fs::getCacheDirectory((std::string("dnn_ie_cache_") + device_name).c_str(), "OPENCV_DNN_IE_GPU_CACHE_DIR"); #else std::string cache_path = utils::getConfigurationParameterString("OPENCV_DNN_IE_GPU_CACHE_DIR", ""); #endif if (!cache_path.empty() && cache_path != "disabled") { CV_LOG_INFO(NULL, "OpenCV/nGraph: using GPU kernels cache: " << cache_path); ie.set_property(device_name, ov::cache_dir(cache_path)); } } } ov::AnyMap config; if (device_name == "MYRIAD" || device_name == "HDDL") { config.emplace("MYRIAD_DETECT_NETWORK_BATCH", "NO"); } bool isHetero = device_name == "FPGA"; // It is actual only for non-CPU targets and networks built in runtime using nGraph. // We do not check IR models because they can be with version less than IRv10 if (!isHetero && device_name != "CPU" && !hasNetOwner) { for (auto& node : net->get_ops()) { if (node->description() == kOpenCVLayersType) { isHetero = true; break; } } } std::string ieDevice = isHetero ? ("HETERO:" + device_name + ",CPU") : device_name; CV_LOG_INFO(NULL, "DNN/IE: Calling LoadNetwork(device=" << ieDevice << ")..."); netExec = ie.compile_model(net, ieDevice, config); } catch (const std::exception& ex) { CV_Error(Error::StsError, format("Failed to initialize Inference Engine backend (device = %s): %s", device_name.c_str(), ex.what())); } } bool InfEngineNgraphNet::isInitialized() { return isInit; } bool NgraphBackendLayer::getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const { bool equal_flag = true; std::map inShapes; int i = 0; for (const auto& inp : t_net->get_parameters()) { ov::Shape oldShape = inp->get_shape(); ov::Shape newShape(inputs[i].begin(), inputs[i].end()); inShapes.insert({inp->get_friendly_name(), newShape}); if (oldShape != newShape) { equal_flag = false; } i++; } if (!equal_flag) { std::shared_ptr curr_t_net(t_net); curr_t_net->reshape(inShapes); } std::vector dims; for (const auto& it : t_net->outputs()) { if (it.get_node()->get_friendly_name() == name) { dims = it.get_partial_shape().get_max_shape(); } } if (dims.empty()) CV_Error(Error::StsError, format("Unable find result with name %s", name.c_str())); outputs.push_back(MatShape(dims.begin(), dims.end())); return false; } bool NgraphBackendLayer::supportBackend(int backendId) { CV_LOG_DEBUG(NULL, "NgraphBackendLayer::supportBackend(" << backendId << ")"); return backendId == DNN_BACKEND_DEFAULT || (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); } void NgraphBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) { CV_Error(Error::StsInternal, "Choose Inference Engine as a preferable backend."); } ov::Tensor wrapToNgraphBlob(const Mat& m) { std::vector shape = getShape(m); if (m.type() == CV_32F) return ov::Tensor(ov::element::f32, shape, m.data); else if (m.type() == CV_8U) return ov::Tensor(ov::element::u8, shape, m.data); else if (m.type() == CV_8SC1) return ov::Tensor(ov::element::i8, shape, m.data); else if (m.type() == CV_32SC1) return ov::Tensor(ov::element::i32, shape, m.data); else CV_Error(Error::StsNotImplemented, format("Unsupported data type %s", typeToString(m.type()).c_str())); } NgraphBackendWrapper::NgraphBackendWrapper(int targetId, const cv::Mat& m) : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, targetId) , host((Mat*)&m) { blob = wrapToNgraphBlob(m); } NgraphBackendWrapper::NgraphBackendWrapper(Ptr wrapper) : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, wrapper->targetId) { Ptr ieWrapper = wrapper.dynamicCast(); CV_Assert(!ieWrapper.empty()); name = ieWrapper->name; blob = ieWrapper->blob; } Ptr NgraphBackendWrapper::create(Ptr wrapper) { return Ptr(new NgraphBackendWrapper(wrapper)); } NgraphBackendWrapper::~NgraphBackendWrapper() { // nothing } void NgraphBackendWrapper::copyToHost() { CV_LOG_DEBUG(NULL, "NgraphBackendWrapper::copyToHost()"); //CV_Error(Error::StsNotImplemented, ""); } void NgraphBackendWrapper::setHostDirty() { CV_LOG_DEBUG(NULL, "NgraphBackendWrapper::setHostDirty()"); //CV_Error(Error::StsNotImplemented, ""); } ov::Tensor copyBlob(const ov::Tensor& blob) { return ov::Tensor(blob.get_element_type(), blob.get_shape()); } void InfEngineNgraphNet::reset() { allBlobs.clear(); infRequests.clear(); isInit = false; } void InfEngineNgraphNet::addBlobs(const std::vector >& ptrs) { auto wrappers = ngraphWrappers(ptrs); for (const auto& wrapper : wrappers) { std::string name = wrapper->name; name = name.empty() ? kDefaultInpLayerName : name; allBlobs.insert({name, wrapper->blob}); } } void InfEngineNgraphNet::NgraphReqWrapper::makePromises(const std::vector >& outsWrappers) { auto outs = ngraphWrappers(outsWrappers); outProms.clear(); outProms.resize(outs.size()); outsNames.resize(outs.size()); for (int i = 0; i < outs.size(); ++i) { outs[i]->futureMat = outProms[i].getArrayResult(); outsNames[i] = outs[i]->name; } } void InfEngineNgraphNet::forward(const std::vector >& outBlobsWrappers, bool isAsync) { CV_LOG_DEBUG(NULL, "InfEngineNgraphNet::forward(" << (isAsync ? "async" : "sync") << ")"); // Look for finished requests. Ptr reqWrapper; for (auto& wrapper : infRequests) { if (wrapper->isReady) { reqWrapper = wrapper; break; } } if (reqWrapper.empty()) { reqWrapper = Ptr(new NgraphReqWrapper()); try { reqWrapper->req = netExec.create_infer_request(); } catch (const std::exception& ex) { CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what())); } infRequests.push_back(reqWrapper); int i = 0; for (const auto& it : netExec.inputs()) { const std::string& name = it.get_node()->get_friendly_name(); auto blobIt = allBlobs.find(name); if (blobIt == allBlobs.end()) { CV_Error(Error::StsAssert, format("Input blob with name %s not found", name.c_str())); } reqWrapper->req.set_input_tensor(i++, isAsync ? copyBlob(blobIt->second) : blobIt->second); } i = 0; for (const auto& it : cnn->outputs()) // Starts from OpenVINO 2024 CompiledModel changes output frindly names { const std::string& name = it.get_node()->get_friendly_name(); auto blobIt = allBlobs.find(name); if (blobIt == allBlobs.end()) { CV_Error(Error::StsAssert, format("Output blob with name %s not found", name.c_str())); } reqWrapper->req.set_output_tensor(i++, isAsync ? copyBlob(blobIt->second) : blobIt->second); } if (isAsync) { bool* isReady = &reqWrapper->isReady; auto* promises = &reqWrapper->outProms; auto* req = &reqWrapper->req; reqWrapper->req.set_callback([isReady, promises, req](std::exception_ptr ex) { CV_LOG_DEBUG(NULL, "DNN(nGraph): completionCallback()"); size_t processedOutputs = 0; try { for (; processedOutputs < promises->size(); ++processedOutputs) { Mat m = infEngineBlobToMat(req->get_output_tensor(processedOutputs)); try { (*promises)[processedOutputs].setValue(m.clone()); } catch (...) { try { (*promises)[processedOutputs].setException(std::current_exception()); } catch(...) { CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation"); } } } } catch (...) { std::exception_ptr e = std::current_exception(); for (; processedOutputs < promises->size(); ++processedOutputs) { try { (*promises)[processedOutputs].setException(e); } catch(...) { CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation"); } } } *isReady = true; }); } } if (isAsync) { // Copy actual data to infer request's input blobs. int i = 0; for (const auto& it : cnn->get_parameters()) { const std::string& name = it->get_friendly_name(); auto blobIt = allBlobs.find(name); Mat srcMat = infEngineBlobToMat(blobIt->second); Mat dstMat = infEngineBlobToMat(reqWrapper->req.get_input_tensor(i++)); srcMat.copyTo(dstMat); } // Set promises to output blobs wrappers. reqWrapper->makePromises(outBlobsWrappers); reqWrapper->isReady = false; reqWrapper->req.start_async(); } else { reqWrapper->req.infer(); } } ov::Output ngraphQuantize(ov::Output input, float output_sc, float output_zp) { float outLow = -128, outHigh = 127; float inpLow = output_sc * (outLow - output_zp); float inpHigh = output_sc * (outHigh - output_zp); return std::make_shared(input, std::make_shared(ov::element::f32, ov::Shape{1}, &inpLow), std::make_shared(ov::element::f32, ov::Shape{1}, &inpHigh), std::make_shared(ov::element::f32, ov::Shape{1}, &outLow), std::make_shared(ov::element::f32, ov::Shape{1}, &outHigh), 256 // levels ); } ov::Output ngraphDequantize(ov::Output input, float input_sc, float input_zp) { float inpLow = -128, inpHigh = 127; float outLow = input_sc * (inpLow - input_zp); float outHigh = input_sc * (inpHigh - input_zp); return std::make_shared(input, std::make_shared(ov::element::f32, ov::Shape{1}, &inpLow), std::make_shared(ov::element::f32, ov::Shape{1}, &inpHigh), std::make_shared(ov::element::f32, ov::Shape{1}, &outLow), std::make_shared(ov::element::f32, ov::Shape{1}, &outHigh), 256 // levels ); } #endif }}