diff --git a/CMakeLists.txt b/CMakeLists.txt index 45edafee38..6a620c94af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -468,6 +468,9 @@ OCV_OPTION(WITH_TIMVX "Include Tim-VX support" OFF OCV_OPTION(WITH_OBSENSOR "Include obsensor support (Orbbec RGB-D modules: Astra+/Femto)" ON VISIBLE_IF (WIN32 AND NOT ARM AND NOT WINRT) OR ( UNIX AND NOT APPLE AND NOT ANDROID) VERIFY HAVE_OBSENSOR) +OCV_OPTION(WITH_CANN "Include CANN support" OFF + VISIBLE_IF TRUE + VERIFY HAVE_CANN) # OpenCV build components # =================================================== @@ -753,6 +756,9 @@ endif() if(WITH_TIMVX) include(cmake/OpenCVFindTIMVX.cmake) endif() +if(WITH_CANN) + include(cmake/OpenCVFindCANN.cmake) +endif() # ---------------------------------------------------------------------------- # Detect other 3rd-party libraries/tools @@ -1736,6 +1742,15 @@ if(WITH_ONNX OR HAVE_ONNX) endif() endif() +if(WITH_CANN) + status("") + status(" CANN:" HAVE_CANN THEN "YES" ELSE "NO") + if(HAVE_CANN) + status(" Include path" CANN_INCLUDE_DIRS THEN "${CANN_INCLUDE_DIRS}" ELSE "NO") + status(" Link libraries:" CANN_LIBRARIES THEN "${CANN_LIBRARIES}" ELSE "NO") + endif() +endif() + # ========================== python ========================== if(BUILD_opencv_python2) status("") diff --git a/cmake/OpenCVFindCANN.cmake b/cmake/OpenCVFindCANN.cmake new file mode 100644 index 0000000000..b0b8e35c6b --- /dev/null +++ b/cmake/OpenCVFindCANN.cmake @@ -0,0 +1,109 @@ +ocv_check_environment_variables(CANN_INSTALL_DIR) + +if("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME}) + set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME}) + message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}") +endif() + +if(CANN_INSTALL_DIR) + # Supported platforms: x86-64, arm64 + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") + else() + set(HAVE_CANN OFF) + message(STATUS "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off HAVE_CANN") + return() + endif() + + # Supported OS: linux (because of we need fork() to build models in child process) + # done via checks in cann.cpp + # FIXME: remove the check if a better model building solution is found + + # include + set(incs_cann "${CANN_INSTALL_DIR}/include") + list(APPEND incs_cann "${CANN_INSTALL_DIR}/opp") + + # libs + # * libascendcl.so + set(lib_ascendcl "${CANN_INSTALL_DIR}/acllib/lib64") + find_library(found_lib_ascendcl NAMES ascendcl PATHS ${lib_ascendcl} NO_DEFAULT_PATH) + if(found_lib_ascendcl) + set(lib_ascendcl ${found_lib_ascendcl}) + message(STATUS "CANN: libascendcl.so is found at ${lib_ascendcl}") + else() + message(STATUS "CANN: Missing libascendcl.so. Turning off HAVE_CANN") + set(HAVE_CANN OFF) + return() + endif() + # * libgraph.so + set(lib_graph "${CANN_INSTALL_DIR}/compiler/lib64") + find_library(found_lib_graph NAMES graph PATHS ${lib_graph} NO_DEFAULT_PATH) + if(found_lib_graph) + set(lib_graph ${found_lib_graph}) + message(STATUS "CANN: libgraph.so is found at ${lib_graph}") + else() + message(STATUS "CANN: Missing libgraph.so. Turning off HAVE_CANN") + set(HAVE_CANN OFF) + return() + endif() + # * libge_compiler.so + set(lib_ge_compiler "${CANN_INSTALL_DIR}/compiler/lib64") + find_library(found_lib_ge_compiler NAMES ge_compiler PATHS ${lib_ge_compiler} NO_DEFAULT_PATH) + if(found_lib_ge_compiler) + set(lib_ge_compiler ${found_lib_ge_compiler}) + message(STATUS "CANN: libge_compiler.so is found at ${lib_ge_compiler}") + else() + message(STATUS "CANN: Missing libge_compiler.so. Turning off HAVE_CANN") + set(HAVE_CANN OFF) + return() + endif() + # * libopsproto.so + set(lib_opsproto "${CANN_INSTALL_DIR}/opp/op_proto/built-in") + find_library(found_lib_opsproto NAMES opsproto PATHS ${lib_opsproto} NO_DEFAULT_PATH) + if(found_lib_opsproto) + set(lib_opsproto ${found_lib_opsproto}) + message(STATUS "CANN: libopsproto.so is found at ${lib_opsproto}") + else() + message(STATUS "CANN: Missing libopsproto.so. Turning off HAVE_CANN") + set(HAVE_CANN OFF) + return() + endif() + + + set(libs_cann "") + list(APPEND libs_cann ${lib_ascendcl}) + list(APPEND libs_cann ${lib_opsproto}) + list(APPEND libs_cann ${lib_graph}) + list(APPEND libs_cann ${lib_ge_compiler}) + + try_compile(VALID_ASCENDCL + "${OpenCV_BINARY_DIR}" + "${OpenCV_SOURCE_DIR}/cmake/checks/cann.cpp" + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${incs_cann}" + "-DLINK_LIBRARIES:STRING=${libs_cann}" + OUTPUT_VARIABLE ASCEND_TRY_OUT) + + if(NOT ${VALID_ASCENDCL}) + message(WARNING "Cannot use CANN") + set(HAVE_CANN OFF) + return() + endif() + + set(HAVE_CANN ON) +endif() + +if(HAVE_CANN) + set(CANN_INCLUDE_DIRS ${incs_cann}) + set(CANN_LIBRARIES ${libs_cann}) + ocv_add_external_target(cann "${CANN_INCLUDE_DIRS}" "${CANN_LIBRARIES}" "HAVE_CANN") + ocv_warnings_disable(CMAKE_C_FLAGS -Wignored-qualifiers) + ocv_warnings_disable(CMAKE_CXX_FLAGS -Wignored-qualifiers) +endif() + +MARK_AS_ADVANCED( + incs_cann + libs_cann + lib_ascendcl + lib_graph + lib_ge_compiler +) diff --git a/cmake/checks/cann.cpp b/cmake/checks/cann.cpp new file mode 100644 index 0000000000..08e463c4e5 --- /dev/null +++ b/cmake/checks/cann.cpp @@ -0,0 +1,20 @@ +#include +#include // fork() +#include + +int main(int /*argc*/, char** /*argv*/) +{ + int ret = aclInit(NULL); + if (ret != 0) + { + std::cerr << "Failed to initialize Ascend, ret = " << ret; + } + + ret = aclFinalize(); + if (ret != 0) + { + std::cerr << "Failed to de-initialize Ascend, ret = " << ret; + } + + return 0; +} diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 6333646a5a..1ec21c085d 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -31,6 +31,10 @@ if(HAVE_TIMVX) ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_TIMVX=1") endif() +if(HAVE_CANN) + ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_CANN=1") +endif() + ocv_option(OPENCV_DNN_CUDA "Build with CUDA support" HAVE_CUDA AND HAVE_CUBLAS @@ -162,6 +166,11 @@ if(HAVE_TIMVX) list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive) endif() +if(HAVE_CANN) + list(APPEND include_dirs ${CANN_INCLUDE_DIRS}) + list(APPEND libs -Wl,--whole-archive ${CANN_LIBRARIES} -Wl,--no-whole-archive) +endif() + set(webnn_srcs "") if(NOT EMSCRIPTEN) if(HAVE_WEBNN) @@ -264,3 +273,10 @@ if(TARGET ocv.3rdparty.openvino AND OPENCV_TEST_DNN_OPENVINO) ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.openvino) endif() endif() + +ocv_option(OPENCV_TEST_DNN_CANN "Build test with CANN" (TARGET ocv.3rdparty.cann)) +if(TARGET ocv.3rdparty.cann AND OPENCV_TEST_DNN_CANN) + if(TARGET opencv_test_dnn) + ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.cann) + endif() +endif() diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index affcf780f4..ffc9473c6e 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -81,6 +81,7 @@ CV__DNN_INLINE_NS_BEGIN DNN_BACKEND_CUDA, DNN_BACKEND_WEBNN, DNN_BACKEND_TIMVX, + DNN_BACKEND_CANN, #if defined(__OPENCV_BUILD) || defined(BUILD_PLUGIN) #if !defined(OPENCV_BINDING_PARSER) DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() @@ -343,6 +344,15 @@ CV__DNN_INLINE_NS_BEGIN const std::vector > &outputsWrapper, bool isLast); + /** + * @brief Returns a CANN backend node + * + * @param inputsWrapper layer inputs + * @param index layer id for op name + * @param nodes inputs of this node + */ + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes); + /** * @brief Automatic Halide scheduling based on layer hyper-parameters. * @param[in] node Backend node with Halide functions. diff --git a/modules/dnn/src/layer.cpp b/modules/dnn/src/layer.cpp index 0ed3488da6..5305a5221d 100644 --- a/modules/dnn/src/layer.cpp +++ b/modules/dnn/src/layer.cpp @@ -84,6 +84,12 @@ Ptr Layer::initTimVX(void* timVxInfo, return Ptr(); } +Ptr Layer::initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) +{ + CV_Error(Error::StsNotImplemented, "CANN pipeline of " + type + " layers is not defined."); + return Ptr(); +} + Ptr Layer::tryAttach(const Ptr& node) { return Ptr(); diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 377e05f5cc..e112ba0746 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -16,6 +16,7 @@ Implementation of Batch Normalization layer. #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" #include "../op_webnn.hpp" +#include "../op_cann.hpp" #include @@ -40,6 +41,7 @@ public: Mat weights_, bias_; UMat umat_weight, umat_bias; mutable int dims; + float momentum; BatchNormLayerImpl(const LayerParams& params) @@ -55,6 +57,9 @@ public: hasWeights = hasBias = true; epsilon = params.get("eps", 1E-5); + // std::cout << params.get("momentum", 0.9) << std::endl; + momentum = params.get("momentum", 0.9); + size_t n = blobs[0].total(); CV_Assert(blobs[1].total() == n && blobs[0].isContinuous() && blobs[1].isContinuous() && @@ -177,7 +182,8 @@ public: return (backendId == DNN_BACKEND_OPENCV) || backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide()) || - backendId == DNN_BACKEND_WEBNN; + backendId == DNN_BACKEND_WEBNN || + backendId == DNN_BACKEND_CANN; } #ifdef HAVE_OPENCL @@ -385,6 +391,66 @@ public: } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + CV_Assert(nodes.size() == 1); + CV_Assert(blobs.size() == 4); // must have scale, offset, mean and variance + + auto x = inputsWrapper[0].dynamicCast(); + auto channel = x->host->size[1]; + + // create operator + std::string op_name = cv::format("bn_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_epsilon(epsilon); + op->set_attr_data_format("NCHW"); + op->set_attr_is_training(false); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + // set inputs : scale (blobs[2]) + std::vector shape_{channel}; + auto op_const_scale = std::make_shared(blobs[2].data, blobs[2].type(), shape_, cv::format("%s_scale", op_name.c_str())); + op->set_input_scale(*(op_const_scale->getOp())); + op->update_input_desc_scale(*(op_const_scale->getTensorDesc())); + // set inputs : offset (blobs[3]) + auto op_const_offset = std::make_shared(blobs[3].data, blobs[3].type(), shape_, cv::format("%s_offset", op_name.c_str())); + op->set_input_offset(*(op_const_offset->getOp())); + op->update_input_desc_offset(*(op_const_offset->getTensorDesc())); + // set inputs : mean (blobs[0]) + auto op_const_mean = std::make_shared(blobs[0].data, blobs[0].type(), shape_, cv::format("%s_mean", op_name.c_str())); + op->set_input_mean(*(op_const_mean->getOp())); + op->update_input_desc_mean(*(op_const_mean->getTensorDesc())); + // set inputs : variance (blobs[1]) + auto op_const_var = std::make_shared(blobs[1].data, blobs[1].type(), shape_, cv::format("%s_var", op_name.c_str())); + op->set_input_variance(*(op_const_var->getOp())); + op->update_input_desc_variance(*(op_const_var->getTensorDesc())); + + // set outputs + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_y_desc); + auto output_bm_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_batch_mean(*output_bm_desc); + auto output_bv_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_batch_variance(*output_bv_desc); + auto output_rs1_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_reserve_space_1(*output_rs1_desc); + auto output_rs2_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_reserve_space_2(*output_rs2_desc); + auto output_rs3_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_reserve_space_3(*output_rs3_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 0d6ab19e4d..972aa7c9c8 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -43,6 +43,7 @@ #include "../op_cuda.hpp" #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include "../op_cann.hpp" #ifdef HAVE_CUDA #include "../cuda4dnn/primitives/reshape.hpp" @@ -68,7 +69,8 @@ public: return true; #endif return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA; + backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_CANN; } bool getMemoryShapes(const std::vector &inputs, @@ -118,6 +120,28 @@ public: inputs[i].copyTo(outputs[i]); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + auto x_desc = x->getTensorDesc(); + auto op_x = nodes[0].dynamicCast()->getOp(); + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + // create operator + std::string op_name = cv::format("identity_%d", index); + auto op = std::make_shared(op_name); + + // set inputs + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + + // set output + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 5ba0cd199b..1b520cf87a 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -49,6 +49,7 @@ #include "../op_vkcom.hpp" #include "../op_webnn.hpp" #include "../op_timvx.hpp" +#include "../op_cann.hpp" #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" @@ -140,7 +141,8 @@ public: backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) || // By channels (backendId == DNN_BACKEND_WEBNN && !padding) || - (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding); + (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding) || + (backendId == DNN_BACKEND_CANN && !padding); } template @@ -364,6 +366,38 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + CV_Assert(inputsWrapper.size() == nodes.size()); + + // create operator + std::string op_name = cv::format("concat_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + int N = inputsWrapper.size(); + op->set_attr_concat_dim(axis); + op->set_attr_N(N); + + // set inputs : x (dynamic) + op->create_dynamic_input_x(N); + for (int i = 0; i < N; i++) + { + auto x_i = inputsWrapper[i].dynamicCast(); + auto x_i_desc = x_i->getTensorDesc(); + auto op_x_i = nodes[i].dynamicCast()->getOp(); + op->set_dynamic_input_x(i, *op_x_i, "y"); + op->update_dynamic_input_desc_x(i, *x_i_desc); + } + + // set outputs + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/const_layer.cpp b/modules/dnn/src/layers/const_layer.cpp index 4392763be7..2141ad987a 100644 --- a/modules/dnn/src/layers/const_layer.cpp +++ b/modules/dnn/src/layers/const_layer.cpp @@ -11,6 +11,9 @@ #include "layers_common.hpp" #include "../ie_ngraph.hpp" #include "../op_webnn.hpp" +#include "../op_cann.hpp" + +#include #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" @@ -40,7 +43,8 @@ public: #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_WEBNN || - backendId == DNN_BACKEND_CUDA; + backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_CANN; } virtual bool getMemoryShapes(const std::vector &inputs, @@ -79,6 +83,40 @@ public: blobs[0].copyTo(outputs[0]); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto mat_shape = shape(blobs[0]); + std::vector mat_shape_{mat_shape.begin(), mat_shape.end()}; + + auto ge_shape = ge::Shape(mat_shape_); + auto ge_dtype = ge::DT_FLOAT; + switch (blobs[0].type()) + { + case CV_32F: break; + case CV_32S: ge_dtype = ge::DT_INT32; break; + default: CV_Error(Error::StsNotImplemented, "Unsuppported data type"); + } + auto size_of_type = sizeof(float); + switch (blobs[0].type()) + { + case CV_32F: break; + case CV_32S: size_of_type = sizeof(int); break; + default: CV_Error(Error::StsNotImplemented, "Unsuppported data type"); + } + + auto desc = std::make_shared(ge_shape, ge::FORMAT_NCHW, ge_dtype); + auto ge_tensor = std::make_shared(); + ge_tensor->SetTensorDesc(*desc); + ge_tensor->SetData(blobs[0].data, ge_shape.GetShapeSize() * size_of_type); + + std::string op_name = cv::format("const_%d", index); + auto op = std::make_shared(op_name); + op->set_attr_value(*ge_tensor); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index cc39593232..8648a25297 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -48,6 +48,7 @@ #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" #include "../op_webnn.hpp" +#include "../op_cann.hpp" #include #include @@ -369,6 +370,17 @@ public: return true; } #endif +#ifdef HAVE_CANN + if (backendId == DNN_BACKEND_CANN) + { + if (ksize != 2) + { + CV_LOG_WARNING(NULL, "CANN supports Conv2D for now"); + return false; + } + return true; + } +#endif // HAVE_CANN return false; } @@ -768,6 +780,68 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + CV_Assert(!blobs.empty()); + CV_Assert(inputsWrapper.size() == 1); + CV_Assert(nodes.size() == 1); + + bool has_bias = hasBias() || fusedBias; + + auto x = inputsWrapper[0].dynamicCast(); + const int x_in_channel = x->host->size[1]; + const int filter_out_channel = blobs[0].size[1]; + const int groups = x_in_channel / filter_out_channel; + + // create operator + std::string op_name = cv::format("conv2d_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_strides(ge::Operator::OpListInt( + {1, 1, (int64_t)strides[0], (int64_t)strides[1]} + )); + op->set_attr_pads(ge::Operator::OpListInt( + {(int64_t)pads_begin[1], (int64_t)pads_end[1], (int64_t)pads_begin[0], (int64_t)pads_end[0]} + )); + op->set_attr_dilations(ge::Operator::OpListInt( + {1, 1, (int64_t)dilations[0], (int64_t)dilations[1]} + )); + op->set_attr_groups(groups); + op->set_attr_data_format("NCHW"); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + // set inputs : weight + const Mat& w_mat = blobs[0]; + auto op_const_weight = std::make_shared(w_mat.data, w_mat.type(), shape(w_mat), cv::format("%s_w", op_name.c_str())); + op->set_input_filter(*(op_const_weight->getOp())); + op->update_input_desc_filter(*(op_const_weight->getTensorDesc())); + // set inputs : bias + if (has_bias) + { + int out_channel = blobs[0].size[0]; + Mat b_mat({out_channel}, CV_32F, &biasvec[0]); + + std::vector bias_shape{out_channel}; + auto op_const_bias = std::make_shared(b_mat.data, b_mat.type(), bias_shape, cv::format("%s_b", op_name.c_str())); + op->set_input_bias(*(op_const_bias->getOp())); + op->update_input_desc_bias(*(op_const_bias->getTensorDesc())); + } + + // set outputs + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif + #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector > &inputs, diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 01c369e5fd..a4b71ddddf 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -48,6 +48,7 @@ #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" #include "../op_webnn.hpp" +#include "../op_cann.hpp" #include #include @@ -186,6 +187,12 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + return func.initCannOp(inputsWrapper, index, nodes); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE @@ -350,7 +357,8 @@ struct ReLUFunctor : public BaseFunctor return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_VKCOM; + backendId == DNN_BACKEND_VKCOM || + backendId == DNN_BACKEND_CANN; } void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const @@ -450,6 +458,42 @@ struct ReLUFunctor : public BaseFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + auto op_x = nodes[0].dynamicCast()->getOp(); + auto x_desc = x->getTensorDesc(); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + if (slope) + { + std::string op_name = cv::format("leakyrelu_%d", index); + auto op = std::make_shared(op_name); + + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + + op->set_attr_negative_slope(slope); + + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } + + std::string op_name = cv::format("relu_%d", index); + auto op = std::make_shared(op_name); // FIXIT: Relu6? + + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif + #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -525,7 +569,8 @@ struct ReLU6Functor : public BaseFunctor return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_WEBNN; + backendId == DNN_BACKEND_WEBNN || + backendId == DNN_BACKEND_CANN; } void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const @@ -607,6 +652,37 @@ struct ReLU6Functor : public BaseFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("clip_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + Mat min_value_mat(1, 1, CV_32F, Scalar(minValue)); + std::vector shape_{1}; + auto op_const_minv = std::make_shared(min_value_mat.data, min_value_mat.type(), shape_, cv::format("%s_min_value", op_name.c_str())); + op->set_input_clip_value_min(*(op_const_minv->getOp())); + op->update_input_desc_clip_value_min(*(op_const_minv->getTensorDesc())); + + Mat max_value_mat(1, 1, CV_32F, Scalar(maxValue)); + auto op_const_maxv = std::make_shared(max_value_mat.data, max_value_mat.type(), shape_, cv::format("%s_max_value", op_name.c_str())); + op->set_input_clip_value_min(*(op_const_maxv->getOp())); + op->update_input_desc_clip_value_min(*(op_const_maxv->getTensorDesc())); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif + #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -728,6 +804,12 @@ struct BaseDefaultFunctor : public BaseFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + CV_Error(Error::StsNotImplemented, ""); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -767,7 +849,8 @@ struct TanHFunctor : public BaseDefaultFunctor #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -790,6 +873,26 @@ struct TanHFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("tanh_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -811,7 +914,9 @@ struct SwishFunctor : public BaseDefaultFunctor { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -834,6 +939,28 @@ struct SwishFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("swish_%d", index); + auto op = std::make_shared(op_name); + + op->set_attr_scale(1.0f); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -856,7 +983,9 @@ struct MishFunctor : public BaseDefaultFunctor { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -887,6 +1016,26 @@ struct MishFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("mish_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -918,7 +1067,8 @@ struct SigmoidFunctor : public BaseDefaultFunctor #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -941,6 +1091,25 @@ struct SigmoidFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("sigmoid_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -970,7 +1139,8 @@ struct ELUFunctor : public BaseDefaultFunctor #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -998,6 +1168,28 @@ struct ELUFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("elu_%d", index); + auto op = std::make_shared(op_name); + + op->set_attr_alpha(alpha); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -1023,7 +1215,8 @@ struct AbsValFunctor : public BaseDefaultFunctor #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -1046,6 +1239,25 @@ struct AbsValFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("abs_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -1071,7 +1283,8 @@ struct BNLLFunctor : public BaseDefaultFunctor { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE; + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -1087,6 +1300,26 @@ struct BNLLFunctor : public BaseDefaultFunctor } #endif +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("bnll_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_HALIDE void attachHalide(const Halide::Expr& input, Halide::Func& top) { @@ -1123,6 +1356,26 @@ struct CeilFunctor : public BaseDefaultFunctor } #endif +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("bnll_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_HALIDE void attachHalide(const Halide::Expr& input, Halide::Func& top) { @@ -1143,7 +1396,10 @@ struct FloorFunctor : public BaseDefaultFunctor bool supportBackend(int backendId, int) { - return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE; + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_HALIDE || + backendId == DNN_BACKEND_CANN; } inline float calculate(float x) const @@ -1158,6 +1414,26 @@ struct FloorFunctor : public BaseDefaultFunctor } #endif +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + + std::string op_name = cv::format("floor_%d", index); + auto op = std::make_shared(op_name); + + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN + #ifdef HAVE_HALIDE void attachHalide(const Halide::Expr& input, Halide::Func& top) { @@ -1992,6 +2268,12 @@ struct PowerFunctor : public BaseFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + CV_Error(Error::StsNotImplemented, ""); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -2240,6 +2522,31 @@ struct ChannelsPReLUFunctor : public BaseFunctor } #endif // HAVE_HALIDE +#ifdef HAVE_CANN + Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + { + auto x = inputsWrapper[0].dynamicCast(); + auto op_x = nodes[0].dynamicCast()->getOp(); + auto x_desc = x->getTensorDesc(); + + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + std::string op_name = cv::format("prelu_%d", index); + auto op = std::make_shared(op_name); + + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + + std::vector shape_{scale.size[0]}; // scale should be a 1d of shape [n] tensor, and it is a 2d mat of shape [n, 1] in opencv + auto op_const_slope = std::make_shared(scale.data, scale.type(), shape_, cv::format("%s_weight", op_name.c_str())); + op->set_input_weight(*(op_const_slope->getOp())); + op->update_input_desc_weight(*(op_const_slope->getTensorDesc())); + + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index a67b0c4bb5..24a87bcc17 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -46,6 +46,8 @@ #include "../op_halide.hpp" #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include "../op_cann.hpp" + #include #ifdef HAVE_OPENCL @@ -169,6 +171,11 @@ public: return channelsMode == ELTWISE_CHANNNELS_SAME; #endif +#ifdef HAVE_CANN + if (backendId == DNN_BACKEND_CANN) + return channelsMode == ELTWISE_CHANNNELS_SAME && coeffs.empty(); +#endif + if (backendId == DNN_BACKEND_CUDA) { if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE) @@ -841,6 +848,47 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + CV_Assert(inputsWrapper.size() == 2); + CV_Assert(nodes.size() == 2); + + auto op_x1 = nodes[0].dynamicCast()->getOp(); + auto x1 = inputsWrapper[0].dynamicCast(); + auto x1_desc = x1->getTensorDesc(); + auto op_x2 = nodes[1].dynamicCast()->getOp(); + auto x2 = inputsWrapper[1].dynamicCast(); + auto x2_desc = x2->getTensorDesc(); + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + std::shared_ptr eltwise_operator = nullptr; + // add, mul, div, max, min + switch (op) + { +#define BUILD_CANN_ELTWISE_OP(op_type, class_name, op_name) \ + case op_type: { \ + auto eltwise_op = \ + std::make_shared(op_name); \ + eltwise_op->set_input_x1_by_name(*op_x1, "y"); \ + eltwise_op->set_input_x2_by_name(*op_x2, "y"); \ + eltwise_op->update_input_desc_x1(*x1_desc); \ + eltwise_op->update_input_desc_x2(*x2_desc); \ + eltwise_op->update_output_desc_y(*output_desc); \ + eltwise_operator = eltwise_op; \ + } break; + BUILD_CANN_ELTWISE_OP(SUM, Add, cv::format("add_%d", index)); + BUILD_CANN_ELTWISE_OP(PROD, Mul, cv::format("mul_%d", index)); + BUILD_CANN_ELTWISE_OP(DIV, Xdivy, cv::format("div_%d", index)); + BUILD_CANN_ELTWISE_OP(MAX, Maximum, cv::format("max_%d", index)); + BUILD_CANN_ELTWISE_OP(MIN, Minimum, cv::format("min_%d", index)); +#undef BUILD_CANN_ELTWISE_OP + default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); + } + + return Ptr(new CannBackendNode(eltwise_operator)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index b3f57dc7cd..ff30da3a11 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -45,6 +45,7 @@ #include "../op_cuda.hpp" #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include "../op_cann.hpp" #include #include @@ -77,7 +78,8 @@ public: return true; #endif return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA; + backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_CANN; } bool getMemoryShapes(const std::vector &inputs, @@ -173,6 +175,33 @@ public: } } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + auto x_desc = x->getTensorDesc(); + auto op_x = nodes[0].dynamicCast()->getOp(); + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + std::string op_name = cv::format("flatten_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + int num_axes = x->host->dims; + int start_axis = normalize_axis(_startAxis, num_axes); + int end_axis = normalize_axis(_endAxis, num_axes); + op->set_attr_axis(start_axis); + op->set_attr_end_axis(end_axis); + + // set inputs + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + // set outputs + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 505ea460d5..539c083399 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -47,6 +47,7 @@ #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" #include "../op_webnn.hpp" +#include "../op_cann.hpp" #include @@ -184,7 +185,8 @@ public: return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_CUDA && !tranAorB) || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) || - (backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB); + (backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) || + backendId == DNN_BACKEND_CANN;; } virtual bool setActivation(const Ptr& layer) CV_OVERRIDE @@ -660,6 +662,65 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x1 = inputsWrapper[0].dynamicCast(); + auto x1_desc = x1->getTensorDesc(); + auto op_x1 = nodes[0].dynamicCast()->getOp(); + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + std::string op_name = cv::format("matmul_%d", index); + auto op = std::make_shared(op_name); + + if (!blobs.empty()) // if B is const + { + // set attributes + op->set_attr_transpose_x1(false); + // weightMat always needs to be transposed, since CPU backend + // implementation is input * weight.im2row + op->set_attr_transpose_x2(true); + + // set inputs + // set inputs : x2 (weight) + auto op_const_weight = std::make_shared(weightsMat.data, weightsMat.type(), shape(weightsMat), cv::format("%s_w", op_name.c_str())); + op->set_input_x2_by_name(*(op_const_weight->getOp()), "y"); + op->update_input_desc_x2(*(op_const_weight->getTensorDesc())); + } + else + { + // A and B are variable inputs; non-const bias is not considered + CV_Assert(inputsWrapper.size() == 2); + CV_Assert(nodes.size() == 2); + + // set attributes + op->set_attr_transpose_x1(transA); + op->set_attr_transpose_x2(transB); + + // set inputs : x2 (weight) + auto op_x2 = nodes[1].dynamicCast()->getOp(); + auto x2_desc = inputsWrapper[1].dynamicCast()->getTensorDesc(); + op->set_input_x2_by_name(*op_x2, "y"); + op->update_input_desc_x2(*x2_desc); + } + + // set inputs + // set inputs : x1 (input) + op->set_input_x1_by_name(*op_x1, "y"); + op->update_input_desc_x1(*x1_desc); + // set inputs : bias (bias) + auto bias_mat = bias ? biasMat : Mat::zeros(1, weightsMat.size[0], weightsMat.type()); + std::vector bias_shape{weightsMat.size[0]}; + auto op_const_bias = std::make_shared(bias_mat.data, bias_mat.type(), bias_shape, cv::format("%s_b", op_name.c_str())); + op->set_input_bias(*(op_const_bias->getOp())); + op->update_input_desc_bias(*(op_const_bias->getTensorDesc())); + + // set outputs + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 6c3a654159..f012a91730 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -47,6 +47,7 @@ #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" +#include "../op_cann.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/dnn/shape_utils.hpp" @@ -106,7 +107,8 @@ public: return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || - (backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM)); + (backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM)) || + backendId == DNN_BACKEND_CANN; } #ifdef HAVE_OPENCL @@ -442,6 +444,38 @@ public: #endif // HAVE_HALIDE } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + + // create operator + std::string op_name = cv::format("lrn_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_depth_radius(size); + op->set_attr_bias(bias); + op->set_attr_alpha(alpha); + op->set_attr_beta(beta); + op->set_attr_norm_region("ACROSS_CHANNELS"); + if (type == SPATIAL_NRM) + op->set_attr_norm_region("WITHIN_CHANNEL"); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + // set outputs + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index ecd8b7351a..6850cd5211 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -5,6 +5,8 @@ #include "../precomp.hpp" #include "layers_common.hpp" #include "../op_cuda.hpp" +#include "../op_cann.hpp" + #include #include @@ -97,6 +99,11 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_CANN + if (backendId == DNN_BACKEND_CANN) + return op == OPERATION::ADD || op == OPERATION::PROD || op == OPERATION::DIV || + op == OPERATION::DIV || op == OPERATION::MAX || op == OPERATION::MIN; +#endif if (op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM || op == OPERATION::PROD || op == OPERATION::DIV) return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; @@ -682,6 +689,48 @@ public: } #endif +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + CV_Assert(inputsWrapper.size() == 2); + CV_Assert(nodes.size() == 2); + + auto op_x1 = nodes[0].dynamicCast()->getOp(); + auto x1 = inputsWrapper[0].dynamicCast(); + auto x1_desc = x1->getTensorDesc(); + auto op_x2 = nodes[1].dynamicCast()->getOp(); + auto x2 = inputsWrapper[1].dynamicCast(); + auto x2_desc = x2->getTensorDesc(); + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + std::shared_ptr eltwise_operator = nullptr; + // add, mul, div, max, min + switch (op) + { +#define BUILD_CANN_ELTWISE_OP(op_type, class_name, op_name) \ + case op_type: { \ + auto eltwise_op = \ + std::make_shared(op_name); \ + eltwise_op->set_input_x1_by_name(*op_x1, "y"); \ + eltwise_op->set_input_x2_by_name(*op_x2, "y"); \ + eltwise_op->update_input_desc_x1(*x1_desc); \ + eltwise_op->update_input_desc_x2(*x2_desc); \ + eltwise_op->update_output_desc_y(*output_desc); \ + eltwise_operator = eltwise_op; \ + } break; + BUILD_CANN_ELTWISE_OP(OPERATION::ADD, Add, cv::format("add_%d", index)); + BUILD_CANN_ELTWISE_OP(OPERATION::PROD, Mul, cv::format("mul_%d", index)); + BUILD_CANN_ELTWISE_OP(OPERATION::DIV, Xdivy, cv::format("div_%d", index)); + BUILD_CANN_ELTWISE_OP(OPERATION::MAX, Maximum, cv::format("max_%d", index)); + BUILD_CANN_ELTWISE_OP(OPERATION::MIN, Minimum, cv::format("min_%d", index)); +#undef BUILD_CANN_ELTWISE_OP + default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); + } + + return Ptr(new CannBackendNode(eltwise_operator)); + } +#endif // HAVE_CANN + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index aea8ab3168..359c82a1a3 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -15,6 +15,7 @@ Implementation of padding layer, which adds paddings to input blob. #include "../op_halide.hpp" #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include "../op_cann.hpp" #include @@ -113,7 +114,8 @@ public: #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - (backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4); + (backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4) || + backendId == DNN_BACKEND_CANN; } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE @@ -219,6 +221,50 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + + // create operator + std::string op_name = cv::format("pad_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_mode(paddingType.c_str()); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + // set inputs : paddings + std::vector pads; + for (int i = 0; i < paddings.size(); i++) + { + pads.push_back(paddings[i].first); + pads.push_back(paddings[i].second); + } + std::vector pads_shape{(int)pads.size()}; + Mat paddings_mat(pads_shape, CV_32S, &pads[0]); + auto op_const_paddings = std::make_shared(paddings_mat.data, paddings_mat.type(), pads_shape, cv::format("%s_paddings", op_name.c_str())); + op->set_input_paddings(*(op_const_paddings->getOp())); + op->update_input_desc_paddings(*(op_const_paddings->getTensorDesc())); + // set inputs : constant_values + std::vector constant_values_shape{1}; + Mat constant_values_mat(1, 1, CV_32F, Scalar(paddingValue)); + auto op_const_constant_values = std::make_shared(constant_values_mat.data, constant_values_mat.type(), constant_values_shape, cv::format("%s_constant_values", op_name.c_str())); + op->set_input_constant_values(*(op_const_constant_values->getOp())); + op->update_input_desc_constant_values(*(op_const_constant_values->getTensorDesc())); + + // set outputs + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index cce36b951f..1aee12d7ae 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -48,6 +48,7 @@ #include "../op_vkcom.hpp" #include "../op_webnn.hpp" #include "../op_timvx.hpp" +#include "../op_cann.hpp" #include #include @@ -143,7 +144,8 @@ public: return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_WEBNN || - (backendId == DNN_BACKEND_VKCOM && haveVulkan()); + (backendId == DNN_BACKEND_VKCOM && haveVulkan()) || + backendId == DNN_BACKEND_CANN; } bool getMemoryShapes(const std::vector &inputs, @@ -438,6 +440,34 @@ public: } } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + + // create operator + std::string op_name = cv::format("permute_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_order(ge::Operator::OpListInt( + _order.begin(), _order.end() + )); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + // set outputs + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 9a808a7c67..9b9ced468f 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -47,6 +47,7 @@ #include "../op_halide.hpp" #include "../op_inf_engine.hpp" #include "../op_webnn.hpp" +#include "../op_cann.hpp" #ifdef HAVE_DNN_NGRAPH #include "../ie_ngraph.hpp" @@ -199,6 +200,12 @@ public: { return type == MAX || type == AVE || type == ROI; } +#ifdef HAVE_CANN + if (backendId == DNN_BACKEND_CANN) + { + return type == MAX || type == AVE; + } +#endif #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { @@ -540,6 +547,82 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + auto op_x = nodes[0].dynamicCast()->getOp(); + auto x_desc = x->getTensorDesc(); + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + std::string op_name_base = cv::format("pooling_%d", index); + if (type == MAX) + { + std::string op_name = cv::format("max_%s", op_name_base.c_str()); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_ksize(ge::Operator::OpListInt( + {1, 1, (int64_t)kernel_size[0], (int64_t)kernel_size[1]} + )); + op->set_attr_strides(ge::Operator::OpListInt( + {1, 1, (int64_t)strides[0], (int64_t)strides[1]} + )); + std::string cann_pad_mode{"CALCULATED"}; + if (padMode == "SAME" || padMode == "VALID") + cann_pad_mode = padMode; + op->set_attr_padding_mode(cann_pad_mode.c_str()); + op->set_attr_pads(ge::Operator::OpListInt( + {(int64_t)pads_begin[0], (int64_t)pads_end[0], (int64_t)pads_begin[1], (int64_t)pads_end[1]} + )); + op->set_attr_data_format("NCHW"); + op->set_attr_global_pooling(globalPooling); + op->set_attr_ceil_mode(ceilMode); + + // set inputs + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + // set outputs + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } + else if (type == AVE) + { + std::string op_name = cv::format("avg_%s", op_name_base.c_str()); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_ksize(ge::Operator::OpListInt( + {1, 1, (int64_t)kernel_size[0], (int64_t)kernel_size[1]} + )); + op->set_attr_strides(ge::Operator::OpListInt( + {1, 1, (int64_t)strides[0], (int64_t)strides[1]} + )); + std::string cann_pad_mode{"CALCULATED"}; + if (padMode == "SAME" || padMode == "VALID") + cann_pad_mode = padMode; + op->set_attr_padding_mode(cann_pad_mode.c_str()); + op->set_attr_pads(ge::Operator::OpListInt( + {(int64_t)pads_begin[0], (int64_t)pads_end[0], (int64_t)pads_begin[1], (int64_t)pads_end[1]} + )); + op->set_attr_global_pooling(globalPooling); + op->set_attr_ceil_mode(ceilMode); + auto cann_exclusive = !avePoolPaddedArea; + op->set_attr_exclusive(cann_exclusive); + + // set inputs + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + // set outputs + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } + else + CV_Error(Error::StsNotImplemented, "Unsupported pooling type"); + } +#endif #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index 290effd380..3ff8a225b7 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -47,6 +47,7 @@ #include "../ie_ngraph.hpp" #include "../op_webnn.hpp" #include "../op_timvx.hpp" +#include "../op_cann.hpp" #include @@ -163,8 +164,8 @@ public: ReshapeLayerImpl(const LayerParams& params) { setParamsFrom(params); - int axis = params.get("axis", 0); - int numAxes = params.get("num_axes", -1); + axis = params.get("axis", 0); + numAxes = params.get("num_axes", -1); hasDynamicShapes = params.get("has_dynamic_shapes", false); shapesInitialized = !hasDynamicShapes; @@ -224,7 +225,8 @@ public: #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_WEBNN; + backendId == DNN_BACKEND_WEBNN || + backendId == DNN_BACKEND_CANN; } bool getMemoryShapes(const std::vector &inputs, @@ -324,6 +326,39 @@ public: } } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + + // create operator + std::string op_name = cv::format("reshape_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_axis(axis); + op->set_attr_num_axes(numAxes); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + // set inputs : shape + std::vector shape_of_shape{(int)newShapeDesc.size()}; + Mat shape_mat(shape_of_shape, CV_32S, newShapeDesc.data()); + auto op_const_shape = std::make_shared(shape_mat.data, shape_mat.type(), shape_of_shape, cv::format("%s_shape", op_name.c_str())); + op->set_input_shape(*(op_const_shape->getOp())); + op->update_input_desc_shape(*(op_const_shape->getTensorDesc())); + + // set outputs + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, @@ -464,6 +499,8 @@ public: } private: + int axis; + int numAxes; std::vector outShapes; std::vector dynamicShapes; // Which axes shapes are dynamic and require reinitialization with new input std::vector inputIndices; // Which axes from input are needed to compute correct output shape diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index ab640dbf3f..356b193ab5 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -8,6 +8,7 @@ #include "layers_common.hpp" #include "../op_cuda.hpp" #include "../op_inf_engine.hpp" +#include "../op_cann.hpp" #include #ifdef HAVE_DNN_NGRAPH @@ -77,6 +78,9 @@ public: if (backendId == DNN_BACKEND_CUDA) return interpolation == "nearest" || interpolation == "bilinear" || interpolation == "opencv_linear"; + if (backendId == DNN_BACKEND_CANN) + return interpolation == "nearest" || interpolation == "bilinear" || interpolation == "opencv_linear"; + #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { @@ -307,6 +311,67 @@ public: CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + auto x_desc = x->getTensorDesc(); + auto op_x = nodes[0].dynamicCast()->getOp(); + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + + // create operator + std::string op_name = cv::format("resize_%d", index); + + if (interpolation == "nearest") + { + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_align_corners(alignCorners); + op->set_attr_half_pixel_centers(halfPixelCenters); + + // set inputs : x + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + // set inputs : size + std::vector shape_of_size_mat{2}; + Mat size_mat(2, 1, CV_32S, Scalar(outHeight, outWidth)); + auto op_const_size = std::make_shared(size_mat.data, size_mat.type(), shape_of_size_mat, cv::format("%s_size", op_name.c_str())); + op->set_input_size(*(op_const_size->getOp())); + op->update_input_desc_size(*(op_const_size->getTensorDesc())); + + // set outputs + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } + else if (interpolation == "opencv_linear" || interpolation == "bilinear") + { + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_align_corners(alignCorners); + op->set_attr_half_pixel_centers(halfPixelCenters); + + // set inputs : x + op->set_input_x_by_name(*op_x, "y"); + op->update_input_desc_x(*x_desc); + // set inputs : size + std::vector shape_of_size_mat{2}; + Mat size_mat(2, 1, CV_32S, Scalar(outHeight, outWidth)); + auto op_const_size = std::make_shared(size_mat.data, size_mat.type(), shape_of_size_mat, cv::format("%s_size", op_name.c_str())); + op->set_input_size(*(op_const_size->getOp())); + op->update_input_desc_size(*(op_const_size->getTensorDesc())); + + // set outputs + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } + else + CV_Error(Error::StsNotImplemented, "Unsupported interpolation by CANN backend: " + interpolation); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index aa44e4a5b9..d646b6c058 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -44,6 +44,7 @@ #include "../op_cuda.hpp" #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include "../op_cann.hpp" #include "layers_common.hpp" #include @@ -198,7 +199,7 @@ public: if (backendId == DNN_BACKEND_CUDA) return !hasSteps; #endif - return backendId == DNN_BACKEND_OPENCV; + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CANN; } bool getMemoryShapes(const std::vector &inputs, @@ -589,6 +590,66 @@ public: } } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + CV_Assert(sliceRanges.size() == 1); + CV_Assert(sliceSteps.size() == 1); + CV_Assert(sliceRanges[0].size() == sliceSteps[0].size()); + + auto x = inputsWrapper[0].dynamicCast(); + const int dims = x->host->dims; + + // create operator + std::string op_name = cv::format("slice_%d", index); + auto op = std::make_shared(op_name); + + // retrieve begins, ends, axes and steps + std::vector begins, ends, axes, steps; + for (int i = 0; i < sliceRanges[0].size(); i++) + { + begins.push_back(sliceRanges[0][i].start); + ends.push_back(sliceRanges[0][i].end); + axes.push_back(i); + steps.push_back(sliceSteps[0][i]); + } + std::vector shape_{dims}; + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + // set inputs : begin + Mat begin_mat(shape_, CV_32S, &begins[0]); + auto op_const_begin = std::make_shared(begin_mat.data, begin_mat.type(), shape_, cv::format("%s_begin", op_name.c_str())); + op->set_input_begin(*(op_const_begin->getOp())); + op->update_input_desc_begin(*(op_const_begin->getTensorDesc())); + // set inputs : end + Mat end_mat(shape_, CV_32S, &ends[0]); + auto op_const_end = std::make_shared(end_mat.data, end_mat.type(), shape_, cv::format("%s_end", op_name.c_str())); + op->set_input_end(*(op_const_end->getOp())); + op->update_input_desc_end(*(op_const_end->getTensorDesc())); + // set inputs : axes + Mat axes_mat(shape_, CV_32S, &axes[0]); + auto op_const_axes = std::make_shared(axes_mat.data, axes_mat.type(), shape_, cv::format("%s_axes", op_name.c_str())); + op->set_input_axes(*(op_const_axes->getOp())); + op->update_input_desc_axes(*(op_const_axes->getTensorDesc())); + // set inputs : strides + Mat strides_mat(shape_, CV_32S, &steps[0]); + auto op_const_strides = std::make_shared(strides_mat.data, strides_mat.type(), shape_, cv::format("%s_strides", op_name.c_str())); + op->set_input_strides(*(op_const_strides->getOp())); + op->update_input_desc_strides(*(op_const_strides->getTensorDesc())); + + // set outputs + auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif + #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index b10aef3453..c1ea4d2297 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -48,6 +48,7 @@ #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" #include "../op_webnn.hpp" +#include "../op_cann.hpp" #include #include @@ -116,7 +117,8 @@ public: return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) || - (backendId == DNN_BACKEND_VKCOM && haveVulkan()); + (backendId == DNN_BACKEND_VKCOM && haveVulkan()) || + backendId == DNN_BACKEND_CANN; } #ifdef HAVE_OPENCL @@ -362,6 +364,34 @@ public: return Ptr(); } +#ifdef HAVE_CANN + virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + { + auto x = inputsWrapper[0].dynamicCast(); + + // create operator + std::string op_name = cv::format("softmax_%d", index); + auto op = std::make_shared(op_name); + + // set attributes + op->set_attr_axes(ge::Operator::OpListInt( + {(int64_t)axisRaw} + )); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, "y"); + auto x_desc = x->getTensorDesc(); + op->update_input_desc_x(*x_desc); + + // set outputs + auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_output_desc_y(*output_y_desc); + + return Ptr(new CannBackendNode(op)); + } +#endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/legacy_backend.cpp b/modules/dnn/src/legacy_backend.cpp index 431c597fab..b092fb057c 100644 --- a/modules/dnn/src/legacy_backend.cpp +++ b/modules/dnn/src/legacy_backend.cpp @@ -13,6 +13,7 @@ #include "op_cuda.hpp" #include "op_webnn.hpp" #include "op_timvx.hpp" +#include "op_cann.hpp" namespace cv { namespace dnn { @@ -115,6 +116,10 @@ Ptr wrapMat(int backendId, int targetId, cv::Mat& m) return Ptr(new TimVXBackendWrapper(m)); #endif // HAVE_TIMVX } + else if (backendId == DNN_BACKEND_CANN) + { + CV_Assert(0 && "Internal error: DNN_BACKEND_CANN must be implemented through inheritance"); + } else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); return Ptr(); // TODO Error? diff --git a/modules/dnn/src/net_cann.cpp b/modules/dnn/src/net_cann.cpp new file mode 100644 index 0000000000..62d45d85c5 --- /dev/null +++ b/modules/dnn/src/net_cann.cpp @@ -0,0 +1,348 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + +#include "net_impl.hpp" + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +#ifdef HAVE_CANN + +static std::shared_ptr compileCannGraph(std::shared_ptr graph); + +class NetImplCann CV_FINAL : public Net::Impl +{ +public: + typedef Net::Impl Base; + + bool newWasSupported, netWasConverted; + + explicit NetImplCann(const Ptr& basePtr) + : Net::Impl() + { + CV_LOG_INFO(NULL, "Initializing NetImplCann"); + basePtr_ = basePtr; + newWasSupported = true; + netWasConverted = false; + + init(); + + CV_LOG_INFO(NULL, "Finished initializing NetImplCann"); + } + + void init() + { + CV_TRACE_FUNCTION(); + CV_Assert(basePtr_); + Net::Impl& base = *basePtr_; + CV_Assert(!base.netWasAllocated); + CV_Assert(!base.netWasQuantized); // does not support quantized net for now + netInputLayer = base.netInputLayer; + blobsToKeep = base.blobsToKeep; + layers = base.layers; + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + LayerData& ld = it->second; + ld.resetAllocation(); + } + layerNameToId = base.layerNameToId; + outputNameToId = base.outputNameToId; + preferableBackend = DNN_BACKEND_CANN; + preferableTarget = DNN_TARGET_NPU; // force using NPU + hasDynamicShapes = base.hasDynamicShapes; + CV_Assert(base.backendWrappers.empty()); //backendWrappers = base.backendWrappers; + lastLayerId = base.lastLayerId; + netWasAllocated = base.netWasAllocated; + netWasQuantized = base.netWasQuantized; + fusion = base.fusion; + } + + bool empty() const override + { + return Base::empty(); + } + + void setPreferableBackend(Net& net, int backendId) override + { + if (backendId == preferableBackend) + return; // no-op + else + CV_Error(Error::StsError, "DNN: Can't switch backend from CANN to other"); + Ptr& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net); + impl_ptr_ref = basePtr_; + basePtr_->setPreferableBackend(net, backendId); + } + + void setPreferableTarget(int targetId) override + { + if (targetId != preferableTarget) + { + CV_Error(Error::StsError, "DNN: Can't switch target from NPU to other"); + } + } + + Ptr wrap(Mat& host) override + { + return Ptr(new CannBackendWrapper(host)); + } + + // void fuseLayers(const std::vector& blobsToKeep_); // fusion is done in the CANN graph engine + + void initBackend(const std::vector& blobsToKeep_) override; + + void forwardLayer(LayerData& ld) override; +}; + +void NetImplCann::initBackend(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + CV_CheckEQ(preferableBackend, DNN_BACKEND_CANN, ""); + + // netWasAllocated turns to false if requested output is changed or input shape changes + if (netWasConverted && netWasAllocated) + return; + + if (!netWasConverted) + { + newWasSupported = true; + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + auto& ld = it->second; + auto layer = ld.layerInstance; + if (ld.id != 0 && !layer->supportBackend(preferableBackend)) + { + newWasSupported = false; + CV_LOG_INFO(NULL, "DNN/CANN: layer (name=" << ld.name << ", type=" << ld.type << ") is not supported by CANN backend. Going back to CPU backend"); + } + } + } + if (!newWasSupported) + return ; + + // convert layers to CANN operators, + // collect graph input and output operators, + // collect and input and output wrappers + int firstOutputLayerId = -1; + std::vector > netInputNodes; + std::vector graphInputOps, graphOutputOps; + std::vector> graphInputWrappers, graphOutputWrappers; + CV_LOG_INFO(NULL, "DNN/CANN: converting layers to CANN operators"); + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + auto layer = ld.layerInstance; + + if (ld.id == 0) + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); i++) + { + std::string inputName = netInputLayer->outNames.empty() ? cv::format("%s_%d", ld.name.c_str(), i) : netInputLayer->outNames[i]; + auto inputOp = std::make_shared(inputName); + + // retrieve tensor description + auto wrapper = ld.outputBlobsWrappers[i]; + graphInputWrappers.push_back(wrapper); + auto cannWrapper = wrapper.dynamicCast(); + CV_Assert(!cannWrapper.empty()); + + inputOp->update_input_desc_x(*(cannWrapper->desc_)); + inputOp->update_output_desc_y(*(cannWrapper->desc_)); + + graphInputOps.push_back(*inputOp); + netInputNodes.push_back(Ptr(new CannBackendNode(inputOp))); + } + } + else + { + ld.skip = true; // skip all cann operators + + std::vector > layerInputNodes; + for (int i = 0; i < ld.inputBlobsId.size(); i++) + { + int layerInputLid = ld.inputBlobsId[i].lid; + int layerInputOid = ld.inputBlobsId[i].oid; + if (layerInputLid == 0) + { + layerInputNodes.push_back(netInputNodes[layerInputOid]); + } + else // here we do not consider an op with multiple outputs + { + layerInputNodes.push_back(layers[layerInputLid].backendNodes[preferableBackend]); + } + } + + CV_LOG_INFO(NULL, "DNN/CANN: converting layer " << ld.name << "@" << ld.type << "@" << ld.id << " to CANN operator"); + auto backendNode = layer->initCann(ld.inputBlobsWrappers, ld.id, layerInputNodes); + + // collect outputs + bool isOutputNode = ld.consumers.size() == 0 ? true : false; + if (isOutputNode) + { + if (firstOutputLayerId < 0) + firstOutputLayerId = ld.id; + auto cannNode = backendNode.dynamicCast(); + graphOutputOps.push_back(*(cannNode->getOp())); + // assume cann graph outputs and dnn net outputs have the same order + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + graphOutputWrappers.push_back(ld.outputBlobsWrappers[i]); + } + } + + ld.backendNodes[preferableBackend] = backendNode; + } + } + CV_LOG_INFO(NULL, "DNN/CANN: done converting layers to CANN operators"); + + // build graph from collected graph inputs and outputs + CV_LOG_INFO(NULL, "DNN/CANN: building ge::Graph"); + std::string graphName = cv::format("graph_%d", 0); + std::shared_ptr graph = std::make_shared(graphName.c_str()); + (void)graph->SetInputs(graphInputOps); + (void)graph->SetOutputs(graphOutputOps); + CV_LOG_INFO(NULL, "DNN/CANN: done building ge::Graph"); + + // convert ge::Graph to OM buffer + CV_LOG_INFO(NULL, "DNN/CANN: converting ge::Graph to OM buffer"); + std::shared_ptr modelBuffer = compileCannGraph(graph); + CV_LOG_INFO(NULL, "DNN/CANN: OM buffer size = " << modelBuffer->length); + CV_LOG_INFO(NULL, "DNN/CANN: done building ge::Graph to OM buffer"); + + // keep net in the first output node and mark the node runnable + auto& ld = layers[firstOutputLayerId]; + auto cannNode = ld.backendNodes[preferableBackend].dynamicCast(); + std::shared_ptr net = std::shared_ptr(new CannNet()); + net->loadModelBuffer(modelBuffer); + net->bindInputWrappers(graphInputWrappers); + net->bindOutputWrappers(graphOutputWrappers); + cannNode->net = net; + ld.skip = false; + + netWasConverted = true; +} + +void NetImplCann::forwardLayer(LayerData& ld) +{ + CV_TRACE_FUNCTION(); + + auto layer = ld.layerInstance; + + if (!ld.skip) + { + auto it = ld.backendNodes.find(preferableBackend); + if (ld.id == 0 || it == ld.backendNodes.end()) // input layer + { + return Base::forwardLayer(ld); + } + + CV_Assert(it != ld.backendNodes.end()); + const Ptr& node = it->second; + CV_Assert(!node.empty()); + auto cannNode = node.dynamicCast(); + CV_Assert(!cannNode.empty()); + CV_Assert(cannNode->net); + + TickMeter tm; + tm.start(); + + cannNode->net->forward(); + + tm.stop(); + int64_t t = tm.getTimeTicks(); + layersTimings[ld.id] = (t > 0) ? t : 1; + } + else + { + layersTimings[ld.id] = 0; + } + + ld.flag = 1; +} + +std::shared_ptr compileCannGraph(std::shared_ptr graph) +{ + const size_t hdrsize = 32; + std::shared_ptr out_buffer = std::make_shared(); + size_t buf_size = (1 << 27), model_size; // default buf_size 128 MB + for (int iter = 0; iter < 2; ++iter) + { + size_t* shared_buf = (size_t*)mmap(NULL, buf_size + hdrsize, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANONYMOUS, -1, 0); + uint8_t* model_data = (uint8_t*)(shared_buf + 1); + pid_t child; + int childstate = 0; + bool ok; + if ((child=fork()) == 0) + { + // initialize engine + std::map options = { + {ge::AscendString(ge::ir_option::SOC_VERSION), ge::AscendString("Ascend310")}, + }; + ACL_CHECK_GRAPH_RET(ge::aclgrphBuildInitialize(options)); + + // build + std::shared_ptr om_model = std::make_shared(); + std::map build_options; + ACL_CHECK_GRAPH_RET(aclgrphBuildModel(*graph, build_options, *om_model)); + +#if 0 + // (optional). Dump model + AscendString graph_name; + graph.GetName(graph_name); + aclgrphDumpGraph(graph, graph_name.GetString(), 7); + // (optional). Save model + aclgrphSaveModel(graph_name.GetString(), *om_model); +#endif + + // finalize engine + ge::aclgrphBuildFinalize(); + + // send model from child to parent + size_t model_size = om_model->length; + *shared_buf = model_size; + if (model_size > buf_size) + { + exit(1); + } + else + { + memcpy(model_data, om_model->data.get(), model_size); + exit(0); + } + } + waitpid (child, &childstate, 0); + model_size = *shared_buf; + ok = WIFEXITED(childstate) && WEXITSTATUS(childstate) == 0; + if (ok) + { + CV_LOG_INFO(NULL, "Compile success, model size = " << model_size); + out_buffer->data = std::shared_ptr(new uint8_t[model_size]); + memcpy(out_buffer->data.get(), model_data, model_size); + out_buffer->length = model_size; + } + munmap(shared_buf, buf_size + hdrsize); + if (ok) break; + buf_size = model_size; + } + return out_buffer; +} + +void switchToCannBackend(Net& net) +{ + CV_TRACE_FUNCTION(); + Ptr& impl_ptr_ref = accessor::DnnNetAccessor::getImplPtrRef(net); + CV_Assert(impl_ptr_ref); + CV_LOG_INFO(NULL, "DNN: switching to CANN backend... (networkID=" << impl_ptr_ref->networkId << ")"); + Ptr impl_ptr_cann = makePtr(impl_ptr_ref); + impl_ptr_ref = impl_ptr_cann; +} + +#endif // HAVE_CANN + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp index 5411051484..dc0c53191f 100644 --- a/modules/dnn/src/net_impl.cpp +++ b/modules/dnn/src/net_impl.cpp @@ -518,8 +518,8 @@ void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes) for (int i = 0; i < ld.outputBlobs.size(); ++i) ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); - /* CUDA backend has its own system for internal blobs; we don't need these */ - ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA || preferableBackend == DNN_BACKEND_TIMVX) ? 0 : ld.internals.size()); + /* CUDA & CANN backend has its own system for internal blobs; we don't need these */ + ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA || preferableBackend == DNN_BACKEND_TIMVX || preferableBackend == DNN_BACKEND_CANN) ? 0 : ld.internals.size()); for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i) ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); @@ -1566,6 +1566,7 @@ string Net::Impl::dump(bool forceAllocation) const case DNN_BACKEND_CUDA: backend = "CUDA/"; break; case DNN_BACKEND_WEBNN: backend = "WEBNN/"; break; case DNN_BACKEND_TIMVX: backend = "TIMVX/"; break; + case DNN_BACKEND_CANN: backend = "CANN/"; break; // don't use default: } out << "digraph G {\n"; diff --git a/modules/dnn/src/net_impl.hpp b/modules/dnn/src/net_impl.hpp index 08ac1932ca..6eb06aa5b7 100644 --- a/modules/dnn/src/net_impl.hpp +++ b/modules/dnn/src/net_impl.hpp @@ -12,6 +12,7 @@ #include "op_cuda.hpp" #include "op_webnn.hpp" #include "op_timvx.hpp" +#include "op_cann.hpp" #include #include diff --git a/modules/dnn/src/net_impl_backend.cpp b/modules/dnn/src/net_impl_backend.cpp index 1d313c70c4..ef816be66d 100644 --- a/modules/dnn/src/net_impl_backend.cpp +++ b/modules/dnn/src/net_impl_backend.cpp @@ -85,6 +85,10 @@ Ptr Net::Impl::wrap(Mat& host) return Ptr(new TimVXBackendWrapper(baseBuffer, host)); #endif } + else if (preferableBackend == DNN_BACKEND_CANN) + { + CV_Assert(0 && "Internal error: DNN_BACKEND_CANN must be implemented through inheritance"); + } else CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); } @@ -146,6 +150,10 @@ void Net::Impl::initBackend(const std::vector& blobsToKeep_) CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of TimVX"); #endif } + else if (preferableBackend == DNN_BACKEND_CANN) + { + CV_Assert(0 && "Internal error: DNN_BACKEND_CANN must be implemented through inheritance"); + } else { CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend)); @@ -179,6 +187,14 @@ void Net::Impl::setPreferableBackend(Net& net, int backendId) networkBackend.switchBackend(net); #else CV_Error(Error::StsNotImplemented, "OpenVINO backend is not available in the current OpenCV build"); +#endif + } + else if (backendId == DNN_BACKEND_CANN) + { +#ifdef HAVE_CANN + switchToCannBackend(net); +#else + CV_Error(Error::StsNotImplemented, "CANN backend is not availlable in the current OpenCV build"); #endif } else diff --git a/modules/dnn/src/op_cann.cpp b/modules/dnn/src/op_cann.cpp new file mode 100644 index 0000000000..6d8a57446b --- /dev/null +++ b/modules/dnn/src/op_cann.cpp @@ -0,0 +1,329 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" +#include "op_cann.hpp" + +#include +#include +#include // memcpy + +#include +#include + +namespace cv { namespace dnn { + +#ifdef HAVE_CANN + +std::shared_ptr AclEnvGuard::global_acl_env_ = nullptr; +std::mutex AclEnvGuard::global_acl_env_mutex_; + +AclEnvGuard::AclEnvGuard() +{ + CV_LOG_INFO(NULL, "Start to initialize CANN"); + ACL_CHECK_RET(aclInit(NULL)); + CV_LOG_INFO(NULL, "[Success] initialized CANN"); +} + +AclEnvGuard::~AclEnvGuard() +{ + CV_LOG_INFO(NULL, "Start to finalize CANN"); + ACL_CHECK_RET(aclFinalize()); + CV_LOG_INFO(NULL, "[Success] finalized CANN"); +} + +std::shared_ptr AclEnvGuard::GetAclEnv() +{ + std::shared_ptr acl_env; + + std::lock_guard lock(global_acl_env_mutex_); + acl_env = global_acl_env_; + if (acl_env != nullptr) + { + CV_LOG_INFO(NULL, "CANN has been initialized. Skipping..."); + } + else + { + acl_env = std::make_shared(); + global_acl_env_ = acl_env; + } + return acl_env; +} + +CannConstOp::CannConstOp(const uint8_t* data, const int dtype, const std::vector& shape, const std::string& name) +{ + std::vector shape_{shape.begin(), shape.end()}; + + auto ge_shape = ge::Shape(shape_); + auto ge_dtype = ge::DT_FLOAT; + switch (dtype) + { + case CV_32F: break; + case CV_32S: ge_dtype = ge::DT_INT32; break; + default: CV_Error(Error::StsNotImplemented, "Unsupported data type"); + } + auto size_of_type = sizeof(float); + switch (dtype) + { + case CV_32F: break; + case CV_32S: size_of_type = sizeof(int); break; + default: CV_Error(Error::StsNotImplemented, "Unsupported data type"); + } + desc_ = std::make_shared(ge_shape, ge::FORMAT_NCHW, ge_dtype); + auto ge_tensor = std::make_shared(); + ge_tensor->SetTensorDesc(*desc_); + ge_tensor->SetData(data, ge_shape.GetShapeSize() * size_of_type); + op_ = std::make_shared(name); + op_->set_attr_value(*ge_tensor); +} + +CannBackendNode::CannBackendNode(const std::shared_ptr& op) + : BackendNode(DNN_BACKEND_CANN), op_(op) { } + +std::shared_ptr CannBackendNode::getOp() { return op_; } + +CannBackendWrapper::CannBackendWrapper(const Mat& m) + : BackendWrapper(DNN_BACKEND_CANN, DNN_TARGET_NPU), host((Mat*)&m) +{ + auto mat_shape = shape(*host); + std::vector shape_{mat_shape.begin(), mat_shape.end()}; + + auto ge_shape = ge::Shape(shape_); + desc_ = std::make_shared(ge_shape, ge::FORMAT_NCHW, ge::DT_FLOAT); +} + +void CannBackendWrapper::copyToHost() +{ + CV_LOG_DEBUG(NULL, "Not implemented"); +} + +void CannBackendWrapper::setHostDirty() +{ + CV_LOG_DEBUG(NULL, "Not implemented"); +} + +CannNet::~CannNet() +{ + CV_LOG_INFO(NULL, "In ~CannNet, inputs = " << inputs << ", outputs = " << outputs); + if (!model_desc) + { + CV_LOG_INFO(NULL, "[Failed] Tried to deconstruct CannNet but model is not loaded"); + return; + } + // free datasets: inputs, outputs + if (inputs) + { + CV_LOG_INFO(NULL, "In ~CannNet: destroy inputs"); + destroyDataset(&inputs); + } + if (outputs) + { + CV_LOG_INFO(NULL, "In ~CannNet: destroy outputs"); + destroyDataset(&outputs); + } + // unload model + ACL_CHECK_RET(aclmdlUnload(model_id)); + // destroy model_desc + ACL_CHECK_RET(aclmdlDestroyDesc(model_desc)); + model_desc = nullptr; + CV_LOG_INFO(NULL, "[Success] Unloaded model (id=" << model_id << ")"); + + // destroy context + if (context != nullptr) + { + ACL_CHECK_RET(aclrtDestroyContext(context)); + context = nullptr; + } + // reset device + if (context == nullptr) + { + ACL_CHECK_RET(aclrtResetDevice(device_id)); + } +} + +bool CannNet::empty() const +{ + return (model_desc == nullptr); +} + +void CannNet::loadModelBuffer(std::shared_ptr modelBuffer) +{ + model.clear(); + model.resize(modelBuffer->length); + std::memcpy(reinterpret_cast(model.data()), + reinterpret_cast(modelBuffer->data.get()), + modelBuffer->length); + loadToDevice(); +} + +void CannNet::bindInputWrappers(const std::vector>& inputWrappers) +{ + CV_Assert(inputWrappers.size() == getInputNum()); + for (size_t i = 0; i < inputWrappers.size(); ++i) + { + auto wrapper = inputWrappers[i].dynamicCast(); + + // verify size + aclmdlIODims model_dims; + ACL_CHECK_RET(aclmdlGetInputDims(model_desc, i, &model_dims)); + CV_CheckEQ((int)model_dims.dimCount, wrapper->host->dims, "Dimension of input does not match with model's requirement"); + for (size_t j = 0; j < model_dims.dimCount; ++j) + CV_CheckEQ((int)model_dims.dims[j], wrapper->host->size[j], "Size of input does not match with model's requirement"); + + input_wrappers.push_back(wrapper); + } +} + +void CannNet::bindOutputWrappers(const std::vector>& outputWrappers) +{ + CV_Assert(outputWrappers.size() == getOutputNum()); + for (int i = 0; i < outputWrappers.size(); ++i) + { + auto wrapper = outputWrappers[i].dynamicCast(); + + // verify size + aclmdlIODims model_dims; + ACL_CHECK_RET(aclmdlGetOutputDims(model_desc, i, &model_dims)); + CV_CheckEQ((int)model_dims.dimCount, wrapper->host->dims, "Dimension of input does not match with model's requirement"); + for (size_t j = 0; j < model_dims.dimCount; ++j) + CV_CheckEQ((int)model_dims.dims[j], wrapper->host->size[j], "Size of input does not match with model's requirement"); + + output_wrappers.push_back(wrapper); + } +} + +void CannNet::forward() +{ + // send inputs from host to device + CV_LOG_DEBUG(NULL, "DNN/CANN: start sending inputs to device"); + for (size_t i = 0; i < input_wrappers.size(); ++i) + { + const void* p_host = (const void*)input_wrappers[i]->host->data; + + auto db = aclmdlGetDatasetBuffer(inputs, i); + auto p_device = aclGetDataBufferAddr(db); + auto db_size = aclGetDataBufferSizeV2(db); + + ACL_CHECK_RET(aclrtMemcpy(p_device, db_size, p_host, db_size, ACL_MEMCPY_HOST_TO_DEVICE)); + } + CV_LOG_DEBUG(NULL, "DNN/CANN: finished sending inputs to device"); + + // forward + CV_LOG_DEBUG(NULL, "DNN/CANN: start network forward"); + ACL_CHECK_RET(aclrtSetCurrentContext(context)); + ACL_CHECK_RET(aclmdlExecute(model_id, inputs, outputs)); + CV_LOG_DEBUG(NULL, "DNN/CANN: finished network forward"); + + // fetch ouputs from device to host + CV_LOG_DEBUG(NULL, "DNN/CANN: start fetching outputs to host"); + for (size_t i = 0; i < output_wrappers.size(); ++i) + { + void* p_host = (void*)output_wrappers[i]->host->data; + + auto db = aclmdlGetDatasetBuffer(outputs, i); + auto p_device = aclGetDataBufferAddr(db); + auto db_size = aclGetDataBufferSizeV2(db); + + ACL_CHECK_RET(aclrtMemcpy(p_host, db_size, p_device, db_size, ACL_MEMCPY_DEVICE_TO_HOST)); + } + CV_LOG_DEBUG(NULL, "DNN/CANN: finish fetching outputs to host"); +} + +size_t CannNet::getInputNum() const +{ + return aclmdlGetNumInputs(model_desc); +} + +size_t CannNet::getOutputNum() const +{ + return aclmdlGetNumOutputs(model_desc); +} + +void CannNet::init() +{ + ACL_CHECK_RET(aclrtSetDevice(device_id)); + ACL_CHECK_RET(aclrtCreateContext(&context, device_id)); +} + +void CannNet::loadToDevice() +{ + if (model_desc != nullptr) + { + CV_LOG_INFO(NULL, "Model has been loaded to device. Skipping ..."); + return; + } + + CV_LOG_INFO(NULL, "Load model to NPU memory"); + ACL_CHECK_RET(aclmdlLoadFromMem(reinterpret_cast(model.data()), model.size(), &model_id)); + + CV_LOG_INFO(NULL, "Create model description"); + model_desc = aclmdlCreateDesc(); + ACL_CHECK_RET(aclmdlGetDesc(model_desc, model_id)); + + createInputDataset(); + createOutputDataset(); +} + +void CannNet::createInputDataset() +{ + inputs = aclmdlCreateDataset(); + size_t n_inputs = aclmdlGetNumInputs(model_desc); + size_t length; + for (size_t i = 0; i < n_inputs; i++) + { + length = aclmdlGetInputSizeByIndex(model_desc, i); + CV_LOG_INFO(NULL, "length = " << length); + void* p_device = nullptr; + ACL_CHECK_RET(aclrtMalloc(&p_device, length, ACL_MEM_MALLOC_NORMAL_ONLY)); + auto p_data_buffer = aclCreateDataBuffer(p_device, length); + ACL_CHECK_RET(aclmdlAddDatasetBuffer(inputs, p_data_buffer)); + } +} + +void CannNet::createOutputDataset() +{ + outputs = aclmdlCreateDataset(); + size_t n_outputs = aclmdlGetNumOutputs(model_desc); + size_t length; + for (size_t i = 0; i < n_outputs; i++) + { + length = aclmdlGetOutputSizeByIndex(model_desc, i); + void* p_device = nullptr; + ACL_CHECK_RET(aclrtMalloc(&p_device, length, ACL_MEM_MALLOC_NORMAL_ONLY)); + auto p_data_buffer = aclCreateDataBuffer(p_device, length); + ACL_CHECK_RET(aclmdlAddDatasetBuffer(outputs, p_data_buffer)); + } +} + +void CannNet::destroyDataset(aclmdlDataset** dataset) +{ + if (!dataset) + { + CV_LOG_INFO(NULL, "CANN dataset is not initialized"); + return; + } + auto buffer_count = aclmdlGetDatasetNumBuffers(*dataset); + CV_LOG_INFO(NULL, "buffer_count = " << buffer_count); + for (auto i = 0; i < buffer_count; i++) + { + auto data_buffer = aclmdlGetDatasetBuffer(*dataset, i); + auto p_device = aclGetDataBufferAddr(data_buffer); + if (p_device) + { + ACL_CHECK_RET(aclrtFree(p_device)); // 107000? + } + else + { + CV_LOG_INFO(NULL, "Data buffer (i=" << i << ") from ACL dataset is invalid"); + } + ACL_CHECK_RET(aclDestroyDataBuffer(data_buffer)); + } + ACL_CHECK_RET(aclmdlDestroyDataset(*dataset)); + *dataset = nullptr; + CV_LOG_INFO(NULL, "[Success] Destroyed dataset"); +} + +#endif // HAVE_CANN + +}} // namespace cv::dnn diff --git a/modules/dnn/src/op_cann.hpp b/modules/dnn/src/op_cann.hpp new file mode 100644 index 0000000000..2237dd4855 --- /dev/null +++ b/modules/dnn/src/op_cann.hpp @@ -0,0 +1,164 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_OP_CANN_HPP +#define OPENCV_DNN_OP_CANN_HPP + +#ifdef HAVE_CANN +#include "acl/acl.h" // acl* functions +#include "graph/graph.h" // ge::Graph; ge::Operator from operator.h +#include "graph/ge_error_codes.h" // GRAPH_SUCCESS, ... + +#include "op_proto/built-in/inc/all_ops.h" // ge::Conv2D, ... +#include "graph/tensor.h" // ge::Shape, ge::Tensor, ge::TensorDesc +#include "graph/types.h" // DT_FLOAT, ... ; FORMAT_NCHW, ... + +#include "ge/ge_api_types.h" // ge::ir_option::SOC_VERSION +#include "ge/ge_ir_build.h" // build graph + +// for fork() +#include +#include +#include +#include +#include + +#endif // HAVE_CANN + +#include + +#ifdef HAVE_CANN +#define ACL_CHECK_RET(f) \ +{ \ + if (f != ACL_SUCCESS) \ + { \ + CV_LOG_ERROR(NULL, "CANN check failed, ret = " << f); \ + CV_Error(Error::StsError, "CANN check failed"); \ + } \ +} +#define ACL_CHECK_GRAPH_RET(f) \ +{ \ + if (f != ge::GRAPH_SUCCESS) \ + { \ + CV_LOG_ERROR(NULL, "CANN graph check failed, ret = " << f); \ + CV_Error(Error::StsError, "CANN graph check failed"); \ + } \ +} + +#endif + +namespace cv { namespace dnn { + +#ifdef HAVE_CANN + +CV__DNN_INLINE_NS_BEGIN + +void switchToCannBackend(Net& net); + +CV__DNN_INLINE_NS_END + + class CannNet; + + class AclEnvGuard { + public: + explicit AclEnvGuard(); + ~AclEnvGuard(); + static std::shared_ptr GetAclEnv(); + + private: + static std::shared_ptr global_acl_env_; + static std::mutex global_acl_env_mutex_; + }; + + class CannConstOp + { + public: + CannConstOp(const uint8_t* data, const int dtype, const std::vector& shape, const std::string& name); + std::shared_ptr getOp() { return op_; } + std::shared_ptr getTensorDesc() { return desc_; } + private: + std::shared_ptr op_; + std::shared_ptr desc_; + }; + + class CannBackendNode : public BackendNode + { + public: + CannBackendNode(const std::shared_ptr& op); + std::shared_ptr getOp(); + std::shared_ptr net; + private: + std::shared_ptr op_; + }; + + class CannBackendWrapper : public BackendWrapper + { + public: + CannBackendWrapper(const Mat& m); + ~CannBackendWrapper() { } + + std::shared_ptr getTensorDesc() { return desc_; } + + virtual void copyToHost() CV_OVERRIDE; + + virtual void setHostDirty() CV_OVERRIDE; + + Mat* host; + std::shared_ptr desc_; + }; + + class CannNet + { + public: + explicit CannNet(int deviceId = 0) + : device_id(deviceId) + { + init(); + acl_env = AclEnvGuard::GetAclEnv(); + } + ~CannNet(); // release private members + + bool empty() const; + + void loadModelBuffer(std::shared_ptr modelBuffer); + + void bindInputWrappers(const std::vector>& inputWrappers); + void bindOutputWrappers(const std::vector>& outputWrappers); + + void forward(); + + size_t getInputNum() const; + size_t getOutputNum() const; + + private: + void init(); + + void loadToDevice(); // call aclInit before this API is called + void createInputDataset(); + void createOutputDataset(); + + int getOutputIndexByName(const std::string& name); + + void destroyDataset(aclmdlDataset** dataset); + + std::shared_ptr acl_env; + + std::vector> input_wrappers; + std::vector> output_wrappers; + + uint32_t model_id{0}; + aclmdlDesc* model_desc{nullptr}; + std::vector model; + aclmdlDataset* inputs{nullptr}; + aclmdlDataset* outputs{nullptr}; + + int device_id{0}; + aclrtContext context{nullptr}; + }; + +#endif // HAVE_CANN + +}} // namespace cv::dnn + +#endif // OPENCV_DNN_OP_CANN_HPP diff --git a/modules/dnn/src/registry.cpp b/modules/dnn/src/registry.cpp index 56b96f4c4c..f5c9e584c6 100644 --- a/modules/dnn/src/registry.cpp +++ b/modules/dnn/src/registry.cpp @@ -11,6 +11,7 @@ #include "op_cuda.hpp" #include "op_webnn.hpp" #include "op_timvx.hpp" +#include "op_cann.hpp" #include "halide_scheduler.hpp" @@ -122,6 +123,10 @@ private: backends.push_back(std::make_pair(DNN_BACKEND_TIMVX, DNN_TARGET_NPU)); } #endif + +#ifdef HAVE_CANN + backends.push_back(std::make_pair(DNN_BACKEND_CANN, DNN_TARGET_NPU)); +#endif } BackendsList backends; diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index e36374bd98..df93e50c91 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -49,6 +49,7 @@ #define CV_TEST_TAG_DNN_SKIP_PARSER "dnn_skip_parser" #define CV_TEST_TAG_DNN_SKIP_TIMVX "dnn_skip_timvx" +#define CV_TEST_TAG_DNN_SKIP_CANN "dnn_skip_cann" #ifdef HAVE_INF_ENGINE #if INF_ENGINE_VER_MAJOR_EQ(2018050000) @@ -139,7 +140,8 @@ testing::internal::ParamGenerator< tuple > dnnBackendsAndTarget bool withVkCom = true, bool withCUDA = true, bool withNgraph = true, - bool withWebnn = true + bool withWebnn = true, + bool withCann = true ); testing::internal::ParamGenerator< tuple > dnnBackendsAndTargetsIE(); diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index 5fdf6c3d1e..bad6a8d082 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -31,6 +31,7 @@ void PrintTo(const cv::dnn::Backend& v, std::ostream* os) case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: *os << "NGRAPH"; return; case DNN_BACKEND_WEBNN: *os << "WEBNN"; return; case DNN_BACKEND_TIMVX: *os << "TIMVX"; return; + case DNN_BACKEND_CANN: *os << "CANN"; return; } // don't use "default:" to emit compiler warnings *os << "DNN_BACKEND_UNKNOWN(" << (int)v << ")"; } @@ -251,7 +252,8 @@ testing::internal::ParamGenerator< tuple > dnnBackendsAndTarget bool withVkCom /*= true*/, bool withCUDA /*= true*/, bool withNgraph /*= true*/, - bool withWebnn /*= false*/ + bool withWebnn /*= false*/, + bool withCann /*= true*/ ) { bool withVPU = validateVPUType(); @@ -311,6 +313,16 @@ testing::internal::ParamGenerator< tuple > dnnBackendsAndTarget CV_UNUSED(withWebnn); #endif +#ifdef HAVE_CANN + if (withCann) + { + for (auto target : getAvailableTargets(DNN_BACKEND_CANN)) + targets.push_back(make_tuple(DNN_BACKEND_CANN, target)); + } +#else + CV_UNUSED(withCann); +#endif // HAVE_CANN + { available = getAvailableTargets(DNN_BACKEND_OPENCV); for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i) @@ -477,6 +489,11 @@ void initDNNTests() registerGlobalSkipTag( CV_TEST_TAG_DNN_SKIP_TIMVX ); +#endif +#ifdef HAVE_CANN + registerGlobalSkipTag( + CV_TEST_TAG_DNN_SKIP_CANN + ); #endif registerGlobalSkipTag( CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE,