mirror of https://github.com/opencv/opencv.git
dnn: add the CANN backend (#22634)
* cann backend impl v1 * cann backend impl v2: use opencv parsers to build models for cann * adjust fc according to the new transA and transB * put cann net in cann backend node and reuse forwardLayer * use fork() to create a child process and compile cann model * remove legacy code * remove debug code * fall bcak to CPU backend if there is one layer not supoorted by CANN backend * fix netInput forwardpull/22967/head
parent
a08c98cdfb
commit
a2b3acfc6e
34 changed files with 2208 additions and 28 deletions
@ -0,0 +1,109 @@ |
||||
ocv_check_environment_variables(CANN_INSTALL_DIR) |
||||
|
||||
if("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME}) |
||||
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME}) |
||||
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}") |
||||
endif() |
||||
|
||||
if(CANN_INSTALL_DIR) |
||||
# Supported platforms: x86-64, arm64 |
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") |
||||
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") |
||||
else() |
||||
set(HAVE_CANN OFF) |
||||
message(STATUS "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off HAVE_CANN") |
||||
return() |
||||
endif() |
||||
|
||||
# Supported OS: linux (because of we need fork() to build models in child process) |
||||
# done via checks in cann.cpp |
||||
# FIXME: remove the check if a better model building solution is found |
||||
|
||||
# include |
||||
set(incs_cann "${CANN_INSTALL_DIR}/include") |
||||
list(APPEND incs_cann "${CANN_INSTALL_DIR}/opp") |
||||
|
||||
# libs |
||||
# * libascendcl.so |
||||
set(lib_ascendcl "${CANN_INSTALL_DIR}/acllib/lib64") |
||||
find_library(found_lib_ascendcl NAMES ascendcl PATHS ${lib_ascendcl} NO_DEFAULT_PATH) |
||||
if(found_lib_ascendcl) |
||||
set(lib_ascendcl ${found_lib_ascendcl}) |
||||
message(STATUS "CANN: libascendcl.so is found at ${lib_ascendcl}") |
||||
else() |
||||
message(STATUS "CANN: Missing libascendcl.so. Turning off HAVE_CANN") |
||||
set(HAVE_CANN OFF) |
||||
return() |
||||
endif() |
||||
# * libgraph.so |
||||
set(lib_graph "${CANN_INSTALL_DIR}/compiler/lib64") |
||||
find_library(found_lib_graph NAMES graph PATHS ${lib_graph} NO_DEFAULT_PATH) |
||||
if(found_lib_graph) |
||||
set(lib_graph ${found_lib_graph}) |
||||
message(STATUS "CANN: libgraph.so is found at ${lib_graph}") |
||||
else() |
||||
message(STATUS "CANN: Missing libgraph.so. Turning off HAVE_CANN") |
||||
set(HAVE_CANN OFF) |
||||
return() |
||||
endif() |
||||
# * libge_compiler.so |
||||
set(lib_ge_compiler "${CANN_INSTALL_DIR}/compiler/lib64") |
||||
find_library(found_lib_ge_compiler NAMES ge_compiler PATHS ${lib_ge_compiler} NO_DEFAULT_PATH) |
||||
if(found_lib_ge_compiler) |
||||
set(lib_ge_compiler ${found_lib_ge_compiler}) |
||||
message(STATUS "CANN: libge_compiler.so is found at ${lib_ge_compiler}") |
||||
else() |
||||
message(STATUS "CANN: Missing libge_compiler.so. Turning off HAVE_CANN") |
||||
set(HAVE_CANN OFF) |
||||
return() |
||||
endif() |
||||
# * libopsproto.so |
||||
set(lib_opsproto "${CANN_INSTALL_DIR}/opp/op_proto/built-in") |
||||
find_library(found_lib_opsproto NAMES opsproto PATHS ${lib_opsproto} NO_DEFAULT_PATH) |
||||
if(found_lib_opsproto) |
||||
set(lib_opsproto ${found_lib_opsproto}) |
||||
message(STATUS "CANN: libopsproto.so is found at ${lib_opsproto}") |
||||
else() |
||||
message(STATUS "CANN: Missing libopsproto.so. Turning off HAVE_CANN") |
||||
set(HAVE_CANN OFF) |
||||
return() |
||||
endif() |
||||
|
||||
|
||||
set(libs_cann "") |
||||
list(APPEND libs_cann ${lib_ascendcl}) |
||||
list(APPEND libs_cann ${lib_opsproto}) |
||||
list(APPEND libs_cann ${lib_graph}) |
||||
list(APPEND libs_cann ${lib_ge_compiler}) |
||||
|
||||
try_compile(VALID_ASCENDCL |
||||
"${OpenCV_BINARY_DIR}" |
||||
"${OpenCV_SOURCE_DIR}/cmake/checks/cann.cpp" |
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${incs_cann}" |
||||
"-DLINK_LIBRARIES:STRING=${libs_cann}" |
||||
OUTPUT_VARIABLE ASCEND_TRY_OUT) |
||||
|
||||
if(NOT ${VALID_ASCENDCL}) |
||||
message(WARNING "Cannot use CANN") |
||||
set(HAVE_CANN OFF) |
||||
return() |
||||
endif() |
||||
|
||||
set(HAVE_CANN ON) |
||||
endif() |
||||
|
||||
if(HAVE_CANN) |
||||
set(CANN_INCLUDE_DIRS ${incs_cann}) |
||||
set(CANN_LIBRARIES ${libs_cann}) |
||||
ocv_add_external_target(cann "${CANN_INCLUDE_DIRS}" "${CANN_LIBRARIES}" "HAVE_CANN") |
||||
ocv_warnings_disable(CMAKE_C_FLAGS -Wignored-qualifiers) |
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wignored-qualifiers) |
||||
endif() |
||||
|
||||
MARK_AS_ADVANCED( |
||||
incs_cann |
||||
libs_cann |
||||
lib_ascendcl |
||||
lib_graph |
||||
lib_ge_compiler |
||||
) |
@ -0,0 +1,20 @@ |
||||
#include <acl/acl.h> |
||||
#include <unistd.h> // fork() |
||||
#include <iostream> |
||||
|
||||
int main(int /*argc*/, char** /*argv*/) |
||||
{ |
||||
int ret = aclInit(NULL); |
||||
if (ret != 0) |
||||
{ |
||||
std::cerr << "Failed to initialize Ascend, ret = " << ret; |
||||
} |
||||
|
||||
ret = aclFinalize(); |
||||
if (ret != 0) |
||||
{ |
||||
std::cerr << "Failed to de-initialize Ascend, ret = " << ret; |
||||
} |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,348 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#include <opencv2/core/utils/logger.hpp> |
||||
|
||||
#include "net_impl.hpp" |
||||
|
||||
namespace cv { namespace dnn { |
||||
CV__DNN_INLINE_NS_BEGIN |
||||
|
||||
#ifdef HAVE_CANN |
||||
|
||||
static std::shared_ptr<ge::ModelBufferData> compileCannGraph(std::shared_ptr<ge::Graph> graph); |
||||
|
||||
class NetImplCann CV_FINAL : public Net::Impl |
||||
{ |
||||
public: |
||||
typedef Net::Impl Base; |
||||
|
||||
bool newWasSupported, netWasConverted; |
||||
|
||||
explicit NetImplCann(const Ptr<Net::Impl>& basePtr) |
||||
: Net::Impl() |
||||
{ |
||||
CV_LOG_INFO(NULL, "Initializing NetImplCann"); |
||||
basePtr_ = basePtr; |
||||
newWasSupported = true; |
||||
netWasConverted = false; |
||||
|
||||
init(); |
||||
|
||||
CV_LOG_INFO(NULL, "Finished initializing NetImplCann"); |
||||
} |
||||
|
||||
void init() |
||||
{ |
||||
CV_TRACE_FUNCTION(); |
||||
CV_Assert(basePtr_); |
||||
Net::Impl& base = *basePtr_; |
||||
CV_Assert(!base.netWasAllocated); |
||||
CV_Assert(!base.netWasQuantized); // does not support quantized net for now
|
||||
netInputLayer = base.netInputLayer; |
||||
blobsToKeep = base.blobsToKeep; |
||||
layers = base.layers; |
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) |
||||
{ |
||||
LayerData& ld = it->second; |
||||
ld.resetAllocation(); |
||||
} |
||||
layerNameToId = base.layerNameToId; |
||||
outputNameToId = base.outputNameToId; |
||||
preferableBackend = DNN_BACKEND_CANN; |
||||
preferableTarget = DNN_TARGET_NPU; // force using NPU
|
||||
hasDynamicShapes = base.hasDynamicShapes; |
||||
CV_Assert(base.backendWrappers.empty()); //backendWrappers = base.backendWrappers;
|
||||
lastLayerId = base.lastLayerId; |
||||
netWasAllocated = base.netWasAllocated; |
||||
netWasQuantized = base.netWasQuantized; |
||||
fusion = base.fusion; |
||||
} |
||||
|
||||
bool empty() const override |
||||
{ |
||||
return Base::empty(); |
||||
} |
||||
|
||||
void setPreferableBackend(Net& net, int backendId) override |
||||
{ |
||||
if (backendId == preferableBackend) |
||||
return; // no-op
|
||||
else |
||||
CV_Error(Error::StsError, "DNN: Can't switch backend from CANN to other"); |
||||
Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net); |
||||
impl_ptr_ref = basePtr_; |
||||
basePtr_->setPreferableBackend(net, backendId); |
||||
} |
||||
|
||||
void setPreferableTarget(int targetId) override |
||||
{ |
||||
if (targetId != preferableTarget) |
||||
{ |
||||
CV_Error(Error::StsError, "DNN: Can't switch target from NPU to other"); |
||||
} |
||||
} |
||||
|
||||
Ptr<BackendWrapper> wrap(Mat& host) override |
||||
{ |
||||
return Ptr<BackendWrapper>(new CannBackendWrapper(host)); |
||||
} |
||||
|
||||
// void fuseLayers(const std::vector<LayerPin>& blobsToKeep_); // fusion is done in the CANN graph engine
|
||||
|
||||
void initBackend(const std::vector<LayerPin>& blobsToKeep_) override; |
||||
|
||||
void forwardLayer(LayerData& ld) override; |
||||
}; |
||||
|
||||
void NetImplCann::initBackend(const std::vector<LayerPin>& blobsToKeep_) |
||||
{ |
||||
CV_TRACE_FUNCTION(); |
||||
CV_CheckEQ(preferableBackend, DNN_BACKEND_CANN, ""); |
||||
|
||||
// netWasAllocated turns to false if requested output is changed or input shape changes
|
||||
if (netWasConverted && netWasAllocated) |
||||
return; |
||||
|
||||
if (!netWasConverted) |
||||
{ |
||||
newWasSupported = true; |
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) |
||||
{ |
||||
auto& ld = it->second; |
||||
auto layer = ld.layerInstance; |
||||
if (ld.id != 0 && !layer->supportBackend(preferableBackend)) |
||||
{ |
||||
newWasSupported = false; |
||||
CV_LOG_INFO(NULL, "DNN/CANN: layer (name=" << ld.name << ", type=" << ld.type << ") is not supported by CANN backend. Going back to CPU backend"); |
||||
} |
||||
} |
||||
} |
||||
if (!newWasSupported) |
||||
return ; |
||||
|
||||
// convert layers to CANN operators,
|
||||
// collect graph input and output operators,
|
||||
// collect and input and output wrappers
|
||||
int firstOutputLayerId = -1; |
||||
std::vector<Ptr<BackendNode> > netInputNodes; |
||||
std::vector<ge::Operator> graphInputOps, graphOutputOps; |
||||
std::vector<Ptr<BackendWrapper>> graphInputWrappers, graphOutputWrappers; |
||||
CV_LOG_INFO(NULL, "DNN/CANN: converting layers to CANN operators"); |
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) |
||||
{ |
||||
LayerData& ld = it->second; |
||||
auto layer = ld.layerInstance; |
||||
|
||||
if (ld.id == 0) |
||||
{ |
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); i++) |
||||
{ |
||||
std::string inputName = netInputLayer->outNames.empty() ? cv::format("%s_%d", ld.name.c_str(), i) : netInputLayer->outNames[i]; |
||||
auto inputOp = std::make_shared<ge::op::Data>(inputName); |
||||
|
||||
// retrieve tensor description
|
||||
auto wrapper = ld.outputBlobsWrappers[i]; |
||||
graphInputWrappers.push_back(wrapper); |
||||
auto cannWrapper = wrapper.dynamicCast<CannBackendWrapper>(); |
||||
CV_Assert(!cannWrapper.empty()); |
||||
|
||||
inputOp->update_input_desc_x(*(cannWrapper->desc_)); |
||||
inputOp->update_output_desc_y(*(cannWrapper->desc_)); |
||||
|
||||
graphInputOps.push_back(*inputOp); |
||||
netInputNodes.push_back(Ptr<BackendNode>(new CannBackendNode(inputOp))); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
ld.skip = true; // skip all cann operators
|
||||
|
||||
std::vector<Ptr<BackendNode> > layerInputNodes; |
||||
for (int i = 0; i < ld.inputBlobsId.size(); i++) |
||||
{ |
||||
int layerInputLid = ld.inputBlobsId[i].lid; |
||||
int layerInputOid = ld.inputBlobsId[i].oid; |
||||
if (layerInputLid == 0) |
||||
{ |
||||
layerInputNodes.push_back(netInputNodes[layerInputOid]); |
||||
} |
||||
else // here we do not consider an op with multiple outputs
|
||||
{ |
||||
layerInputNodes.push_back(layers[layerInputLid].backendNodes[preferableBackend]); |
||||
} |
||||
} |
||||
|
||||
CV_LOG_INFO(NULL, "DNN/CANN: converting layer " << ld.name << "@" << ld.type << "@" << ld.id << " to CANN operator"); |
||||
auto backendNode = layer->initCann(ld.inputBlobsWrappers, ld.id, layerInputNodes); |
||||
|
||||
// collect outputs
|
||||
bool isOutputNode = ld.consumers.size() == 0 ? true : false; |
||||
if (isOutputNode) |
||||
{ |
||||
if (firstOutputLayerId < 0) |
||||
firstOutputLayerId = ld.id; |
||||
auto cannNode = backendNode.dynamicCast<CannBackendNode>(); |
||||
graphOutputOps.push_back(*(cannNode->getOp())); |
||||
// assume cann graph outputs and dnn net outputs have the same order
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) |
||||
{ |
||||
graphOutputWrappers.push_back(ld.outputBlobsWrappers[i]); |
||||
} |
||||
} |
||||
|
||||
ld.backendNodes[preferableBackend] = backendNode; |
||||
} |
||||
} |
||||
CV_LOG_INFO(NULL, "DNN/CANN: done converting layers to CANN operators"); |
||||
|
||||
// build graph from collected graph inputs and outputs
|
||||
CV_LOG_INFO(NULL, "DNN/CANN: building ge::Graph"); |
||||
std::string graphName = cv::format("graph_%d", 0); |
||||
std::shared_ptr<ge::Graph> graph = std::make_shared<ge::Graph>(graphName.c_str()); |
||||
(void)graph->SetInputs(graphInputOps); |
||||
(void)graph->SetOutputs(graphOutputOps); |
||||
CV_LOG_INFO(NULL, "DNN/CANN: done building ge::Graph"); |
||||
|
||||
// convert ge::Graph to OM buffer
|
||||
CV_LOG_INFO(NULL, "DNN/CANN: converting ge::Graph to OM buffer"); |
||||
std::shared_ptr<ge::ModelBufferData> modelBuffer = compileCannGraph(graph); |
||||
CV_LOG_INFO(NULL, "DNN/CANN: OM buffer size = " << modelBuffer->length); |
||||
CV_LOG_INFO(NULL, "DNN/CANN: done building ge::Graph to OM buffer"); |
||||
|
||||
// keep net in the first output node and mark the node runnable
|
||||
auto& ld = layers[firstOutputLayerId]; |
||||
auto cannNode = ld.backendNodes[preferableBackend].dynamicCast<CannBackendNode>(); |
||||
std::shared_ptr<CannNet> net = std::shared_ptr<CannNet>(new CannNet()); |
||||
net->loadModelBuffer(modelBuffer); |
||||
net->bindInputWrappers(graphInputWrappers); |
||||
net->bindOutputWrappers(graphOutputWrappers); |
||||
cannNode->net = net; |
||||
ld.skip = false; |
||||
|
||||
netWasConverted = true; |
||||
} |
||||
|
||||
void NetImplCann::forwardLayer(LayerData& ld) |
||||
{ |
||||
CV_TRACE_FUNCTION(); |
||||
|
||||
auto layer = ld.layerInstance; |
||||
|
||||
if (!ld.skip) |
||||
{ |
||||
auto it = ld.backendNodes.find(preferableBackend); |
||||
if (ld.id == 0 || it == ld.backendNodes.end()) // input layer
|
||||
{ |
||||
return Base::forwardLayer(ld); |
||||
} |
||||
|
||||
CV_Assert(it != ld.backendNodes.end()); |
||||
const Ptr<BackendNode>& node = it->second; |
||||
CV_Assert(!node.empty()); |
||||
auto cannNode = node.dynamicCast<CannBackendNode>(); |
||||
CV_Assert(!cannNode.empty()); |
||||
CV_Assert(cannNode->net); |
||||
|
||||
TickMeter tm; |
||||
tm.start(); |
||||
|
||||
cannNode->net->forward(); |
||||
|
||||
tm.stop(); |
||||
int64_t t = tm.getTimeTicks(); |
||||
layersTimings[ld.id] = (t > 0) ? t : 1; |
||||
} |
||||
else |
||||
{ |
||||
layersTimings[ld.id] = 0; |
||||
} |
||||
|
||||
ld.flag = 1; |
||||
} |
||||
|
||||
std::shared_ptr<ge::ModelBufferData> compileCannGraph(std::shared_ptr<ge::Graph> graph) |
||||
{ |
||||
const size_t hdrsize = 32; |
||||
std::shared_ptr<ge::ModelBufferData> out_buffer = std::make_shared<ge::ModelBufferData>(); |
||||
size_t buf_size = (1 << 27), model_size; // default buf_size 128 MB
|
||||
for (int iter = 0; iter < 2; ++iter) |
||||
{ |
||||
size_t* shared_buf = (size_t*)mmap(NULL, buf_size + hdrsize, PROT_READ|PROT_WRITE, |
||||
MAP_SHARED|MAP_ANONYMOUS, -1, 0); |
||||
uint8_t* model_data = (uint8_t*)(shared_buf + 1); |
||||
pid_t child; |
||||
int childstate = 0; |
||||
bool ok; |
||||
if ((child=fork()) == 0) |
||||
{ |
||||
// initialize engine
|
||||
std::map<ge::AscendString, ge::AscendString> options = { |
||||
{ge::AscendString(ge::ir_option::SOC_VERSION), ge::AscendString("Ascend310")}, |
||||
}; |
||||
ACL_CHECK_GRAPH_RET(ge::aclgrphBuildInitialize(options)); |
||||
|
||||
// build
|
||||
std::shared_ptr<ge::ModelBufferData> om_model = std::make_shared<ge::ModelBufferData>(); |
||||
std::map<ge::AscendString, ge::AscendString> build_options; |
||||
ACL_CHECK_GRAPH_RET(aclgrphBuildModel(*graph, build_options, *om_model)); |
||||
|
||||
#if 0 |
||||
// (optional). Dump model
|
||||
AscendString graph_name; |
||||
graph.GetName(graph_name); |
||||
aclgrphDumpGraph(graph, graph_name.GetString(), 7); |
||||
// (optional). Save model
|
||||
aclgrphSaveModel(graph_name.GetString(), *om_model); |
||||
#endif |
||||
|
||||
// finalize engine
|
||||
ge::aclgrphBuildFinalize(); |
||||
|
||||
// send model from child to parent
|
||||
size_t model_size = om_model->length; |
||||
*shared_buf = model_size; |
||||
if (model_size > buf_size) |
||||
{ |
||||
exit(1); |
||||
} |
||||
else |
||||
{ |
||||
memcpy(model_data, om_model->data.get(), model_size); |
||||
exit(0); |
||||
} |
||||
} |
||||
waitpid (child, &childstate, 0); |
||||
model_size = *shared_buf; |
||||
ok = WIFEXITED(childstate) && WEXITSTATUS(childstate) == 0; |
||||
if (ok) |
||||
{ |
||||
CV_LOG_INFO(NULL, "Compile success, model size = " << model_size); |
||||
out_buffer->data = std::shared_ptr<uint8_t>(new uint8_t[model_size]); |
||||
memcpy(out_buffer->data.get(), model_data, model_size); |
||||
out_buffer->length = model_size; |
||||
} |
||||
munmap(shared_buf, buf_size + hdrsize); |
||||
if (ok) break; |
||||
buf_size = model_size; |
||||
} |
||||
return out_buffer; |
||||
} |
||||
|
||||
void switchToCannBackend(Net& net) |
||||
{ |
||||
CV_TRACE_FUNCTION(); |
||||
Ptr<Net::Impl>& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net); |
||||
CV_Assert(impl_ptr_ref); |
||||
CV_LOG_INFO(NULL, "DNN: switching to CANN backend... (networkID=" << impl_ptr_ref->networkId << ")"); |
||||
Ptr<NetImplCann> impl_ptr_cann = makePtr<NetImplCann>(impl_ptr_ref); |
||||
impl_ptr_ref = impl_ptr_cann; |
||||
} |
||||
|
||||
#endif // HAVE_CANN
|
||||
|
||||
CV__DNN_INLINE_NS_END |
||||
}} // namespace cv::dnn
|
@ -0,0 +1,329 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp" |
||||
#include "op_cann.hpp" |
||||
|
||||
#include <mutex> |
||||
#include <map> |
||||
#include <cstring> // memcpy |
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp> |
||||
#include <opencv2/core/utils/logger.hpp> |
||||
|
||||
namespace cv { namespace dnn { |
||||
|
||||
#ifdef HAVE_CANN |
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr; |
||||
std::mutex AclEnvGuard::global_acl_env_mutex_; |
||||
|
||||
AclEnvGuard::AclEnvGuard() |
||||
{ |
||||
CV_LOG_INFO(NULL, "Start to initialize CANN"); |
||||
ACL_CHECK_RET(aclInit(NULL)); |
||||
CV_LOG_INFO(NULL, "[Success] initialized CANN"); |
||||
} |
||||
|
||||
AclEnvGuard::~AclEnvGuard() |
||||
{ |
||||
CV_LOG_INFO(NULL, "Start to finalize CANN"); |
||||
ACL_CHECK_RET(aclFinalize()); |
||||
CV_LOG_INFO(NULL, "[Success] finalized CANN"); |
||||
} |
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() |
||||
{ |
||||
std::shared_ptr<AclEnvGuard> acl_env; |
||||
|
||||
std::lock_guard<std::mutex> lock(global_acl_env_mutex_); |
||||
acl_env = global_acl_env_; |
||||
if (acl_env != nullptr) |
||||
{ |
||||
CV_LOG_INFO(NULL, "CANN has been initialized. Skipping..."); |
||||
} |
||||
else |
||||
{ |
||||
acl_env = std::make_shared<AclEnvGuard>(); |
||||
global_acl_env_ = acl_env; |
||||
} |
||||
return acl_env; |
||||
} |
||||
|
||||
CannConstOp::CannConstOp(const uint8_t* data, const int dtype, const std::vector<int>& shape, const std::string& name) |
||||
{ |
||||
std::vector<int64_t> shape_{shape.begin(), shape.end()}; |
||||
|
||||
auto ge_shape = ge::Shape(shape_); |
||||
auto ge_dtype = ge::DT_FLOAT; |
||||
switch (dtype) |
||||
{ |
||||
case CV_32F: break; |
||||
case CV_32S: ge_dtype = ge::DT_INT32; break; |
||||
default: CV_Error(Error::StsNotImplemented, "Unsupported data type"); |
||||
} |
||||
auto size_of_type = sizeof(float); |
||||
switch (dtype) |
||||
{ |
||||
case CV_32F: break; |
||||
case CV_32S: size_of_type = sizeof(int); break; |
||||
default: CV_Error(Error::StsNotImplemented, "Unsupported data type"); |
||||
} |
||||
desc_ = std::make_shared<ge::TensorDesc>(ge_shape, ge::FORMAT_NCHW, ge_dtype); |
||||
auto ge_tensor = std::make_shared<ge::Tensor>(); |
||||
ge_tensor->SetTensorDesc(*desc_); |
||||
ge_tensor->SetData(data, ge_shape.GetShapeSize() * size_of_type); |
||||
op_ = std::make_shared<ge::op::Const>(name); |
||||
op_->set_attr_value(*ge_tensor); |
||||
} |
||||
|
||||
CannBackendNode::CannBackendNode(const std::shared_ptr<ge::Operator>& op) |
||||
: BackendNode(DNN_BACKEND_CANN), op_(op) { } |
||||
|
||||
std::shared_ptr<ge::Operator> CannBackendNode::getOp() { return op_; } |
||||
|
||||
CannBackendWrapper::CannBackendWrapper(const Mat& m) |
||||
: BackendWrapper(DNN_BACKEND_CANN, DNN_TARGET_NPU), host((Mat*)&m) |
||||
{ |
||||
auto mat_shape = shape(*host); |
||||
std::vector<int64_t> shape_{mat_shape.begin(), mat_shape.end()}; |
||||
|
||||
auto ge_shape = ge::Shape(shape_); |
||||
desc_ = std::make_shared<ge::TensorDesc>(ge_shape, ge::FORMAT_NCHW, ge::DT_FLOAT); |
||||
} |
||||
|
||||
void CannBackendWrapper::copyToHost() |
||||
{ |
||||
CV_LOG_DEBUG(NULL, "Not implemented"); |
||||
} |
||||
|
||||
void CannBackendWrapper::setHostDirty() |
||||
{ |
||||
CV_LOG_DEBUG(NULL, "Not implemented"); |
||||
} |
||||
|
||||
CannNet::~CannNet() |
||||
{ |
||||
CV_LOG_INFO(NULL, "In ~CannNet, inputs = " << inputs << ", outputs = " << outputs); |
||||
if (!model_desc) |
||||
{ |
||||
CV_LOG_INFO(NULL, "[Failed] Tried to deconstruct CannNet but model is not loaded"); |
||||
return; |
||||
} |
||||
// free datasets: inputs, outputs
|
||||
if (inputs) |
||||
{ |
||||
CV_LOG_INFO(NULL, "In ~CannNet: destroy inputs"); |
||||
destroyDataset(&inputs); |
||||
} |
||||
if (outputs) |
||||
{ |
||||
CV_LOG_INFO(NULL, "In ~CannNet: destroy outputs"); |
||||
destroyDataset(&outputs); |
||||
} |
||||
// unload model
|
||||
ACL_CHECK_RET(aclmdlUnload(model_id)); |
||||
// destroy model_desc
|
||||
ACL_CHECK_RET(aclmdlDestroyDesc(model_desc)); |
||||
model_desc = nullptr; |
||||
CV_LOG_INFO(NULL, "[Success] Unloaded model (id=" << model_id << ")"); |
||||
|
||||
// destroy context
|
||||
if (context != nullptr) |
||||
{ |
||||
ACL_CHECK_RET(aclrtDestroyContext(context)); |
||||
context = nullptr; |
||||
} |
||||
// reset device
|
||||
if (context == nullptr) |
||||
{ |
||||
ACL_CHECK_RET(aclrtResetDevice(device_id)); |
||||
} |
||||
} |
||||
|
||||
bool CannNet::empty() const |
||||
{ |
||||
return (model_desc == nullptr); |
||||
} |
||||
|
||||
void CannNet::loadModelBuffer(std::shared_ptr<ge::ModelBufferData> modelBuffer) |
||||
{ |
||||
model.clear(); |
||||
model.resize(modelBuffer->length); |
||||
std::memcpy(reinterpret_cast<void*>(model.data()), |
||||
reinterpret_cast<void*>(modelBuffer->data.get()), |
||||
modelBuffer->length); |
||||
loadToDevice(); |
||||
} |
||||
|
||||
void CannNet::bindInputWrappers(const std::vector<Ptr<BackendWrapper>>& inputWrappers) |
||||
{ |
||||
CV_Assert(inputWrappers.size() == getInputNum()); |
||||
for (size_t i = 0; i < inputWrappers.size(); ++i) |
||||
{ |
||||
auto wrapper = inputWrappers[i].dynamicCast<CannBackendWrapper>(); |
||||
|
||||
// verify size
|
||||
aclmdlIODims model_dims; |
||||
ACL_CHECK_RET(aclmdlGetInputDims(model_desc, i, &model_dims)); |
||||
CV_CheckEQ((int)model_dims.dimCount, wrapper->host->dims, "Dimension of input does not match with model's requirement"); |
||||
for (size_t j = 0; j < model_dims.dimCount; ++j) |
||||
CV_CheckEQ((int)model_dims.dims[j], wrapper->host->size[j], "Size of input does not match with model's requirement"); |
||||
|
||||
input_wrappers.push_back(wrapper); |
||||
} |
||||
} |
||||
|
||||
void CannNet::bindOutputWrappers(const std::vector<Ptr<BackendWrapper>>& outputWrappers) |
||||
{ |
||||
CV_Assert(outputWrappers.size() == getOutputNum()); |
||||
for (int i = 0; i < outputWrappers.size(); ++i) |
||||
{ |
||||
auto wrapper = outputWrappers[i].dynamicCast<CannBackendWrapper>(); |
||||
|
||||
// verify size
|
||||
aclmdlIODims model_dims; |
||||
ACL_CHECK_RET(aclmdlGetOutputDims(model_desc, i, &model_dims)); |
||||
CV_CheckEQ((int)model_dims.dimCount, wrapper->host->dims, "Dimension of input does not match with model's requirement"); |
||||
for (size_t j = 0; j < model_dims.dimCount; ++j) |
||||
CV_CheckEQ((int)model_dims.dims[j], wrapper->host->size[j], "Size of input does not match with model's requirement"); |
||||
|
||||
output_wrappers.push_back(wrapper); |
||||
} |
||||
} |
||||
|
||||
void CannNet::forward() |
||||
{ |
||||
// send inputs from host to device
|
||||
CV_LOG_DEBUG(NULL, "DNN/CANN: start sending inputs to device"); |
||||
for (size_t i = 0; i < input_wrappers.size(); ++i) |
||||
{ |
||||
const void* p_host = (const void*)input_wrappers[i]->host->data; |
||||
|
||||
auto db = aclmdlGetDatasetBuffer(inputs, i); |
||||
auto p_device = aclGetDataBufferAddr(db); |
||||
auto db_size = aclGetDataBufferSizeV2(db); |
||||
|
||||
ACL_CHECK_RET(aclrtMemcpy(p_device, db_size, p_host, db_size, ACL_MEMCPY_HOST_TO_DEVICE)); |
||||
} |
||||
CV_LOG_DEBUG(NULL, "DNN/CANN: finished sending inputs to device"); |
||||
|
||||
// forward
|
||||
CV_LOG_DEBUG(NULL, "DNN/CANN: start network forward"); |
||||
ACL_CHECK_RET(aclrtSetCurrentContext(context)); |
||||
ACL_CHECK_RET(aclmdlExecute(model_id, inputs, outputs)); |
||||
CV_LOG_DEBUG(NULL, "DNN/CANN: finished network forward"); |
||||
|
||||
// fetch ouputs from device to host
|
||||
CV_LOG_DEBUG(NULL, "DNN/CANN: start fetching outputs to host"); |
||||
for (size_t i = 0; i < output_wrappers.size(); ++i) |
||||
{ |
||||
void* p_host = (void*)output_wrappers[i]->host->data; |
||||
|
||||
auto db = aclmdlGetDatasetBuffer(outputs, i); |
||||
auto p_device = aclGetDataBufferAddr(db); |
||||
auto db_size = aclGetDataBufferSizeV2(db); |
||||
|
||||
ACL_CHECK_RET(aclrtMemcpy(p_host, db_size, p_device, db_size, ACL_MEMCPY_DEVICE_TO_HOST)); |
||||
} |
||||
CV_LOG_DEBUG(NULL, "DNN/CANN: finish fetching outputs to host"); |
||||
} |
||||
|
||||
size_t CannNet::getInputNum() const |
||||
{ |
||||
return aclmdlGetNumInputs(model_desc); |
||||
} |
||||
|
||||
size_t CannNet::getOutputNum() const |
||||
{ |
||||
return aclmdlGetNumOutputs(model_desc); |
||||
} |
||||
|
||||
void CannNet::init() |
||||
{ |
||||
ACL_CHECK_RET(aclrtSetDevice(device_id)); |
||||
ACL_CHECK_RET(aclrtCreateContext(&context, device_id)); |
||||
} |
||||
|
||||
void CannNet::loadToDevice() |
||||
{ |
||||
if (model_desc != nullptr) |
||||
{ |
||||
CV_LOG_INFO(NULL, "Model has been loaded to device. Skipping ..."); |
||||
return; |
||||
} |
||||
|
||||
CV_LOG_INFO(NULL, "Load model to NPU memory"); |
||||
ACL_CHECK_RET(aclmdlLoadFromMem(reinterpret_cast<const void*>(model.data()), model.size(), &model_id)); |
||||
|
||||
CV_LOG_INFO(NULL, "Create model description"); |
||||
model_desc = aclmdlCreateDesc(); |
||||
ACL_CHECK_RET(aclmdlGetDesc(model_desc, model_id)); |
||||
|
||||
createInputDataset(); |
||||
createOutputDataset(); |
||||
} |
||||
|
||||
void CannNet::createInputDataset() |
||||
{ |
||||
inputs = aclmdlCreateDataset(); |
||||
size_t n_inputs = aclmdlGetNumInputs(model_desc); |
||||
size_t length; |
||||
for (size_t i = 0; i < n_inputs; i++) |
||||
{ |
||||
length = aclmdlGetInputSizeByIndex(model_desc, i); |
||||
CV_LOG_INFO(NULL, "length = " << length); |
||||
void* p_device = nullptr; |
||||
ACL_CHECK_RET(aclrtMalloc(&p_device, length, ACL_MEM_MALLOC_NORMAL_ONLY)); |
||||
auto p_data_buffer = aclCreateDataBuffer(p_device, length); |
||||
ACL_CHECK_RET(aclmdlAddDatasetBuffer(inputs, p_data_buffer)); |
||||
} |
||||
} |
||||
|
||||
void CannNet::createOutputDataset() |
||||
{ |
||||
outputs = aclmdlCreateDataset(); |
||||
size_t n_outputs = aclmdlGetNumOutputs(model_desc); |
||||
size_t length; |
||||
for (size_t i = 0; i < n_outputs; i++) |
||||
{ |
||||
length = aclmdlGetOutputSizeByIndex(model_desc, i); |
||||
void* p_device = nullptr; |
||||
ACL_CHECK_RET(aclrtMalloc(&p_device, length, ACL_MEM_MALLOC_NORMAL_ONLY)); |
||||
auto p_data_buffer = aclCreateDataBuffer(p_device, length); |
||||
ACL_CHECK_RET(aclmdlAddDatasetBuffer(outputs, p_data_buffer)); |
||||
} |
||||
} |
||||
|
||||
void CannNet::destroyDataset(aclmdlDataset** dataset) |
||||
{ |
||||
if (!dataset) |
||||
{ |
||||
CV_LOG_INFO(NULL, "CANN dataset is not initialized"); |
||||
return; |
||||
} |
||||
auto buffer_count = aclmdlGetDatasetNumBuffers(*dataset); |
||||
CV_LOG_INFO(NULL, "buffer_count = " << buffer_count); |
||||
for (auto i = 0; i < buffer_count; i++) |
||||
{ |
||||
auto data_buffer = aclmdlGetDatasetBuffer(*dataset, i); |
||||
auto p_device = aclGetDataBufferAddr(data_buffer); |
||||
if (p_device) |
||||
{ |
||||
ACL_CHECK_RET(aclrtFree(p_device)); // 107000?
|
||||
} |
||||
else |
||||
{ |
||||
CV_LOG_INFO(NULL, "Data buffer (i=" << i << ") from ACL dataset is invalid"); |
||||
} |
||||
ACL_CHECK_RET(aclDestroyDataBuffer(data_buffer)); |
||||
} |
||||
ACL_CHECK_RET(aclmdlDestroyDataset(*dataset)); |
||||
*dataset = nullptr; |
||||
CV_LOG_INFO(NULL, "[Success] Destroyed dataset"); |
||||
} |
||||
|
||||
#endif // HAVE_CANN
|
||||
|
||||
}} // namespace cv::dnn
|
@ -0,0 +1,164 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_OP_CANN_HPP |
||||
#define OPENCV_DNN_OP_CANN_HPP |
||||
|
||||
#ifdef HAVE_CANN |
||||
#include "acl/acl.h" // acl* functions |
||||
#include "graph/graph.h" // ge::Graph; ge::Operator from operator.h |
||||
#include "graph/ge_error_codes.h" // GRAPH_SUCCESS, ... |
||||
|
||||
#include "op_proto/built-in/inc/all_ops.h" // ge::Conv2D, ... |
||||
#include "graph/tensor.h" // ge::Shape, ge::Tensor, ge::TensorDesc |
||||
#include "graph/types.h" // DT_FLOAT, ... ; FORMAT_NCHW, ... |
||||
|
||||
#include "ge/ge_api_types.h" // ge::ir_option::SOC_VERSION |
||||
#include "ge/ge_ir_build.h" // build graph |
||||
|
||||
// for fork()
|
||||
#include <stdlib.h> |
||||
#include <sys/mman.h> |
||||
#include <sys/types.h> |
||||
#include <sys/wait.h> |
||||
#include <unistd.h> |
||||
|
||||
#endif // HAVE_CANN
|
||||
|
||||
#include <vector> |
||||
|
||||
#ifdef HAVE_CANN |
||||
#define ACL_CHECK_RET(f) \ |
||||
{ \
|
||||
if (f != ACL_SUCCESS) \
|
||||
{ \
|
||||
CV_LOG_ERROR(NULL, "CANN check failed, ret = " << f); \
|
||||
CV_Error(Error::StsError, "CANN check failed"); \
|
||||
} \
|
||||
} |
||||
#define ACL_CHECK_GRAPH_RET(f) \ |
||||
{ \
|
||||
if (f != ge::GRAPH_SUCCESS) \
|
||||
{ \
|
||||
CV_LOG_ERROR(NULL, "CANN graph check failed, ret = " << f); \
|
||||
CV_Error(Error::StsError, "CANN graph check failed"); \
|
||||
} \
|
||||
} |
||||
|
||||
#endif |
||||
|
||||
namespace cv { namespace dnn { |
||||
|
||||
#ifdef HAVE_CANN |
||||
|
||||
CV__DNN_INLINE_NS_BEGIN |
||||
|
||||
void switchToCannBackend(Net& net); |
||||
|
||||
CV__DNN_INLINE_NS_END |
||||
|
||||
class CannNet; |
||||
|
||||
class AclEnvGuard { |
||||
public: |
||||
explicit AclEnvGuard(); |
||||
~AclEnvGuard(); |
||||
static std::shared_ptr<AclEnvGuard> GetAclEnv(); |
||||
|
||||
private: |
||||
static std::shared_ptr<AclEnvGuard> global_acl_env_; |
||||
static std::mutex global_acl_env_mutex_; |
||||
}; |
||||
|
||||
class CannConstOp |
||||
{ |
||||
public: |
||||
CannConstOp(const uint8_t* data, const int dtype, const std::vector<int>& shape, const std::string& name); |
||||
std::shared_ptr<ge::op::Const> getOp() { return op_; } |
||||
std::shared_ptr<ge::TensorDesc> getTensorDesc() { return desc_; } |
||||
private: |
||||
std::shared_ptr<ge::op::Const> op_; |
||||
std::shared_ptr<ge::TensorDesc> desc_; |
||||
}; |
||||
|
||||
class CannBackendNode : public BackendNode |
||||
{ |
||||
public: |
||||
CannBackendNode(const std::shared_ptr<ge::Operator>& op); |
||||
std::shared_ptr<ge::Operator> getOp(); |
||||
std::shared_ptr<CannNet> net; |
||||
private: |
||||
std::shared_ptr<ge::Operator> op_; |
||||
}; |
||||
|
||||
class CannBackendWrapper : public BackendWrapper |
||||
{ |
||||
public: |
||||
CannBackendWrapper(const Mat& m); |
||||
~CannBackendWrapper() { } |
||||
|
||||
std::shared_ptr<ge::TensorDesc> getTensorDesc() { return desc_; } |
||||
|
||||
virtual void copyToHost() CV_OVERRIDE; |
||||
|
||||
virtual void setHostDirty() CV_OVERRIDE; |
||||
|
||||
Mat* host; |
||||
std::shared_ptr<ge::TensorDesc> desc_; |
||||
}; |
||||
|
||||
class CannNet |
||||
{ |
||||
public: |
||||
explicit CannNet(int deviceId = 0) |
||||
: device_id(deviceId) |
||||
{ |
||||
init(); |
||||
acl_env = AclEnvGuard::GetAclEnv(); |
||||
} |
||||
~CannNet(); // release private members
|
||||
|
||||
bool empty() const; |
||||
|
||||
void loadModelBuffer(std::shared_ptr<ge::ModelBufferData> modelBuffer); |
||||
|
||||
void bindInputWrappers(const std::vector<Ptr<BackendWrapper>>& inputWrappers); |
||||
void bindOutputWrappers(const std::vector<Ptr<BackendWrapper>>& outputWrappers); |
||||
|
||||
void forward(); |
||||
|
||||
size_t getInputNum() const; |
||||
size_t getOutputNum() const; |
||||
|
||||
private: |
||||
void init(); |
||||
|
||||
void loadToDevice(); // call aclInit before this API is called
|
||||
void createInputDataset(); |
||||
void createOutputDataset(); |
||||
|
||||
int getOutputIndexByName(const std::string& name); |
||||
|
||||
void destroyDataset(aclmdlDataset** dataset); |
||||
|
||||
std::shared_ptr<AclEnvGuard> acl_env; |
||||
|
||||
std::vector<Ptr<CannBackendWrapper>> input_wrappers; |
||||
std::vector<Ptr<CannBackendWrapper>> output_wrappers; |
||||
|
||||
uint32_t model_id{0}; |
||||
aclmdlDesc* model_desc{nullptr}; |
||||
std::vector<uint8_t> model; |
||||
aclmdlDataset* inputs{nullptr}; |
||||
aclmdlDataset* outputs{nullptr}; |
||||
|
||||
int device_id{0}; |
||||
aclrtContext context{nullptr}; |
||||
}; |
||||
|
||||
#endif // HAVE_CANN
|
||||
|
||||
}} // namespace cv::dnn
|
||||
|
||||
#endif // OPENCV_DNN_OP_CANN_HPP
|
Loading…
Reference in new issue