Open Source Computer Vision Library https://opencv.org/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

196 lines
7.5 KiB

if(WINRT)
ocv_module_disable(dnn)
endif()
if(NOT HAVE_PROTOBUF)
ocv_module_disable(opencv_dnn)
endif()
set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass")
ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX)
ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java objc js)
ocv_option(OPENCV_DNN_OPENCL "Build with OpenCL support" HAVE_OPENCL AND NOT APPLE)
if(HAVE_TENGINE)
add_definitions(-DHAVE_TENGINE=1)
endif()
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
add_definitions(-DCV_OCL4DNN=1)
endif()
ocv_option(OPENCV_DNN_CUDA "Build with CUDA support"
HAVE_CUDA
AND HAVE_CUBLAS
AND HAVE_CUDNN
)
if(OPENCV_DNN_CUDA)
if(HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
add_definitions(-DCV_CUDA4DNN=1)
else()
if(NOT HAVE_CUDA)
message(SEND_ERROR "DNN: CUDA backend requires CUDA Toolkit. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
elseif(NOT HAVE_CUBLAS)
message(SEND_ERROR "DNN: CUDA backend requires cuBLAS. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
elseif(NOT HAVE_CUDNN)
message(SEND_ERROR "DNN: CUDA backend requires cuDNN. Please resolve dependency or disable OPENCV_DNN_CUDA=OFF")
endif()
endif()
endif()
ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake")
if(MSVC)
add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 )
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146
/wd4305 /wd4127 /wd4100 /wd4512 /wd4125 /wd4389 /wd4510 /wd4610
/wd4702 /wd4456 /wd4457 /wd4065 /wd4310 /wd4661 /wd4506
)
else()
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-deprecated -Wmissing-prototypes -Wmissing-declarations -Wshadow
-Wunused-parameter -Wsign-compare
)
endif()
if(HAVE_CUDA)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
endif()
if(NOT HAVE_CXX11)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-undef) # LANG_CXX11 from protobuf files
endif()
if(APPLE_FRAMEWORK)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshorten-64-to-32)
endif()
if(ANDROID)
add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING)
endif()
if(NOT BUILD_PROTOBUF)
add_definitions(-DOPENCV_DNN_EXTERNAL_PROTOBUF=1)
endif()
add_definitions(-DHAVE_PROTOBUF=1)
#suppress warnings in autogenerated caffe.pb.* files
ocv_warnings_disable(CMAKE_CXX_FLAGS
/wd4125 /wd4267 /wd4127 /wd4244 /wd4512 /wd4702
/wd4456 /wd4510 /wd4610 /wd4800
/wd4701 /wd4703 # potentially uninitialized local/pointer variable 'value' used
/wd4505 # unreferenced local function has been removed
/wd4458 # declaration of 'x' hides class member. GCC still works, MSVC bug is here: https://developercommunity.visualstudio.com/content/problem/219311/c-c4458-declaration-hides-class-member-warning-iss.html
-wd858 -wd2196
-Winvalid-offsetof # Apple Clang (attr_value.pb.cc)
)
set(include_dirs "")
set(libs "")
if(PROTOBUF_UPDATE_FILES)
file(GLOB proto_files "${CMAKE_CURRENT_LIST_DIR}/src/tensorflow/*.proto" "${CMAKE_CURRENT_LIST_DIR}/src/caffe/opencv-caffe.proto" "${CMAKE_CURRENT_LIST_DIR}/src/onnx/opencv-onnx.proto")
set(PROTOBUF_GENERATE_CPP_APPEND_PATH ON) # required for tensorflow
protobuf_generate_cpp(fw_srcs fw_hdrs ${proto_files})
else()
file(GLOB fw_srcs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.cc" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.cc")
file(GLOB fw_hdrs "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow/*.h" "${CMAKE_CURRENT_LIST_DIR}/misc/caffe/opencv-caffe.pb.h" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx/opencv-onnx.pb.h")
set(fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/caffe" "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx")
endif()
list(APPEND include_dirs ${fw_inc})
list(APPEND libs ${Protobuf_LIBRARIES})
if(NOT BUILD_PROTOBUF)
list(APPEND include_dirs ${Protobuf_INCLUDE_DIRS})
endif()
set(sources_options "")
list(APPEND libs ${LAPACK_LIBRARIES})
if(OPENCV_DNN_OPENCL AND HAVE_OPENCL)
list(APPEND include_dirs ${OPENCL_INCLUDE_DIRS})
else()
set(sources_options EXCLUDE_OPENCL)
endif()
if(OPENCV_DNN_CUDA AND HAVE_CUDA AND HAVE_CUBLAS AND HAVE_CUDNN)
list(APPEND include_dirs ${CUDA_TOOLKIT_INCLUDE} ${CUDNN_INCLUDE_DIRS})
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low CUDA backend for the DNN module * stub cuda4dnn design * minor fixes for tests and doxygen * add csl public api directory to module headers * add low-level CSL components * add high-level CSL components * integrate csl::Tensor into backbone code * switch to CPU iff unsupported; otherwise, fail on error * add fully connected layer * add softmax layer * add activation layers * support arbitary rank TensorDescriptor * pass input wrappers to `initCUDA()` * add 1d/2d/3d-convolution * add pooling layer * reorganize and refactor code * fixes for gcc, clang and doxygen; remove cxx14/17 code * add blank_layer * add LRN layer * add rounding modes for pooling layer * split tensor.hpp into tensor.hpp and tensor_ops.hpp * add concat layer * add scale layer * add batch normalization layer * split math.cu into activations.cu and math.hpp * add eltwise layer * add flatten layer * add tensor transform api * add asymmetric padding support for convolution layer * add reshape layer * fix rebase issues * add permute layer * add padding support for concat layer * refactor and reorganize code * add normalize layer * optimize bias addition in scale layer * add prior box layer * fix and optimize normalize layer * add asymmetric padding support for pooling layer * add event API * improve pooling performance for some padding scenarios * avoid over-allocation of compute resources to kernels * improve prior box performance * enable layer fusion * add const layer * add resize layer * add slice layer * add padding layer * add deconvolution layer * fix channelwise ReLU initialization * add vector traits * add vectorized versions of relu, clipped_relu, power * add vectorized concat kernels * improve concat_with_offsets performance * vectorize scale and bias kernels * add support for multi-billion element tensors * vectorize prior box kernels * fix address alignment check * improve bias addition performance of conv/deconv/fc layers * restructure code for supporting multiple targets * add DNN_TARGET_CUDA_FP64 * add DNN_TARGET_FP16 * improve vectorization * add region layer * improve tensor API, add dynamic ranks 1. use ManagedPtr instead of a Tensor in backend wrapper 2. add new methods to tensor classes - size_range: computes the combined size of for a given axis range - tensor span/view can be constructed from a raw pointer and shape 3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time) 4. remove device code from tensor classes (as they are unused) 5. enforce strict conditions on tensor class APIs to improve debugging ability * fix parametric relu activation * add squeeze/unsqueeze tensor API * add reorg layer * optimize permute and enable 2d permute * enable 1d and 2d slice * add split layer * add shuffle channel layer * allow tensors of different ranks in reshape primitive * patch SliceOp to allow Crop Layer * allow extra shape inputs in reshape layer * use `std::move_backward` instead of `std::move` for insert in resizable_static_array * improve workspace management * add spatial LRN * add nms (cpu) to region layer * add max pooling with argmax ( and a fix to limits.hpp) * add max unpooling layer * rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA * update supportBackend to be more rigorous * remove stray include from preventing non-cuda build * include op_cuda.hpp outside condition #if * refactoring, fixes and many optimizations * drop DNN_TARGET_CUDA_FP64 * fix gcc errors * increase max. tensor rank limit to six * add Interp layer * drop custom layers; use BackendNode * vectorize activation kernels * fixes for gcc * remove wrong assertion * fix broken assertion in unpooling primitive * fix build errors in non-CUDA build * completely remove workspace from public API * fix permute layer * enable accuracy and perf. tests for DNN_TARGET_CUDA * add asynchronous forward * vectorize eltwise ops * vectorize fill kernel * fixes for gcc * remove CSL headers from public API * remove csl header source group from cmake * update min. cudnn version in cmake * add numerically stable FP32 log1pexp * refactor code * add FP16 specialization to cudnn based tensor addition * vectorize scale1 and bias1 + minor refactoring * fix doxygen build * fix invalid alignment assertion * clear backend wrappers before allocateLayers * ignore memory lock failures * do not allocate internal blobs * integrate NVTX * add numerically stable half precision log1pexp * fix indentation, following coding style, improve docs * remove accidental modification of IE code * Revert "add asynchronous forward" This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70. * [cmake] throw error for unsupported CC versions * fix rebase issues * add more docs, refactor code, fix bugs * minor refactoring and fixes * resolve warnings/errors from clang * remove haveCUDA() checks from supportBackend() * remove NVTX integration * changes based on review comments * avoid exception when no CUDA device is present * add color code for CUDA in Net::dump
5 years ago
set(CC_LIST ${CUDA_ARCH_BIN})
separate_arguments(CC_LIST)
foreach(cc ${CC_LIST})
if(cc VERSION_LESS 3.0)
message(FATAL_ERROR "CUDA backend for DNN module requires CC 3.0 or higher. Please remove unsupported architectures from CUDA_ARCH_BIN option or disable OPENCV_DNN_CUDA=OFF.")
Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low CUDA backend for the DNN module * stub cuda4dnn design * minor fixes for tests and doxygen * add csl public api directory to module headers * add low-level CSL components * add high-level CSL components * integrate csl::Tensor into backbone code * switch to CPU iff unsupported; otherwise, fail on error * add fully connected layer * add softmax layer * add activation layers * support arbitary rank TensorDescriptor * pass input wrappers to `initCUDA()` * add 1d/2d/3d-convolution * add pooling layer * reorganize and refactor code * fixes for gcc, clang and doxygen; remove cxx14/17 code * add blank_layer * add LRN layer * add rounding modes for pooling layer * split tensor.hpp into tensor.hpp and tensor_ops.hpp * add concat layer * add scale layer * add batch normalization layer * split math.cu into activations.cu and math.hpp * add eltwise layer * add flatten layer * add tensor transform api * add asymmetric padding support for convolution layer * add reshape layer * fix rebase issues * add permute layer * add padding support for concat layer * refactor and reorganize code * add normalize layer * optimize bias addition in scale layer * add prior box layer * fix and optimize normalize layer * add asymmetric padding support for pooling layer * add event API * improve pooling performance for some padding scenarios * avoid over-allocation of compute resources to kernels * improve prior box performance * enable layer fusion * add const layer * add resize layer * add slice layer * add padding layer * add deconvolution layer * fix channelwise ReLU initialization * add vector traits * add vectorized versions of relu, clipped_relu, power * add vectorized concat kernels * improve concat_with_offsets performance * vectorize scale and bias kernels * add support for multi-billion element tensors * vectorize prior box kernels * fix address alignment check * improve bias addition performance of conv/deconv/fc layers * restructure code for supporting multiple targets * add DNN_TARGET_CUDA_FP64 * add DNN_TARGET_FP16 * improve vectorization * add region layer * improve tensor API, add dynamic ranks 1. use ManagedPtr instead of a Tensor in backend wrapper 2. add new methods to tensor classes - size_range: computes the combined size of for a given axis range - tensor span/view can be constructed from a raw pointer and shape 3. the tensor classes can change their rank at runtime (previously rank was fixed at compile-time) 4. remove device code from tensor classes (as they are unused) 5. enforce strict conditions on tensor class APIs to improve debugging ability * fix parametric relu activation * add squeeze/unsqueeze tensor API * add reorg layer * optimize permute and enable 2d permute * enable 1d and 2d slice * add split layer * add shuffle channel layer * allow tensors of different ranks in reshape primitive * patch SliceOp to allow Crop Layer * allow extra shape inputs in reshape layer * use `std::move_backward` instead of `std::move` for insert in resizable_static_array * improve workspace management * add spatial LRN * add nms (cpu) to region layer * add max pooling with argmax ( and a fix to limits.hpp) * add max unpooling layer * rename DNN_TARGET_CUDA_FP32 to DNN_TARGET_CUDA * update supportBackend to be more rigorous * remove stray include from preventing non-cuda build * include op_cuda.hpp outside condition #if * refactoring, fixes and many optimizations * drop DNN_TARGET_CUDA_FP64 * fix gcc errors * increase max. tensor rank limit to six * add Interp layer * drop custom layers; use BackendNode * vectorize activation kernels * fixes for gcc * remove wrong assertion * fix broken assertion in unpooling primitive * fix build errors in non-CUDA build * completely remove workspace from public API * fix permute layer * enable accuracy and perf. tests for DNN_TARGET_CUDA * add asynchronous forward * vectorize eltwise ops * vectorize fill kernel * fixes for gcc * remove CSL headers from public API * remove csl header source group from cmake * update min. cudnn version in cmake * add numerically stable FP32 log1pexp * refactor code * add FP16 specialization to cudnn based tensor addition * vectorize scale1 and bias1 + minor refactoring * fix doxygen build * fix invalid alignment assertion * clear backend wrappers before allocateLayers * ignore memory lock failures * do not allocate internal blobs * integrate NVTX * add numerically stable half precision log1pexp * fix indentation, following coding style, improve docs * remove accidental modification of IE code * Revert "add asynchronous forward" This reverts commit 1154b9da9da07e9b52f8a81bdcea48cf31c56f70. * [cmake] throw error for unsupported CC versions * fix rebase issues * add more docs, refactor code, fix bugs * minor refactoring and fixes * resolve warnings/errors from clang * remove haveCUDA() checks from supportBackend() * remove NVTX integration * changes based on review comments * avoid exception when no CUDA device is present * add color code for CUDA in Net::dump
5 years ago
endif()
endforeach()
unset(CC_LIST)
else()
set(sources_options ${sources_options} EXCLUDE_CUDA)
endif()
if(HAVE_TENGINE)
list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive)
endif()
ocv_module_include_directories(${include_dirs})
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-array-bounds") # GCC 9.3.0 (Ubuntu 20.04)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang
endif()
set(dnn_runtime_libs "")
if(INF_ENGINE_TARGET)
set(use_nn_builder OFF)
if(TARGET inference_engine_nn_builder OR # custom imported target
TARGET IE::inference_engine_nn_builder OR # default imported target via InferenceEngineConfig.cmake
INF_ENGINE_RELEASE VERSION_LESS "2020000000") # compatibility with older versions on IE
set(use_nn_builder ON)
endif()
ocv_option(OPENCV_DNN_IE_NN_BUILDER_2019 "Build with Inference Engine NN Builder API support" ${use_nn_builder}) # future: NOT HAVE_NGRAPH
if(OPENCV_DNN_IE_NN_BUILDER_2019)
message(STATUS "DNN: Enabling Inference Engine NN Builder API support")
add_definitions(-DHAVE_DNN_IE_NN_BUILDER_2019=1)
endif()
list(APPEND dnn_runtime_libs ${INF_ENGINE_TARGET})
endif()
if(HAVE_NGRAPH)
message(STATUS "DNN: Enabling Inference Engine nGraph API support")
add_definitions(-DHAVE_DNN_NGRAPH)
list(APPEND dnn_runtime_libs ngraph::ngraph)
endif()
ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs})
ocv_create_module(${libs} ${dnn_runtime_libs})
ocv_add_samples()
ocv_add_accuracy_tests(${dnn_runtime_libs})
set(perf_path "${CMAKE_CURRENT_LIST_DIR}/perf")
file(GLOB_RECURSE perf_srcs "${perf_path}/*.cpp")
file(GLOB_RECURSE perf_hdrs "${perf_path}/*.hpp" "${perf_path}/*.h")
ocv_add_perf_tests(${INF_ENGINE_TARGET}
FILES test_common "${CMAKE_CURRENT_LIST_DIR}/test/test_common.hpp" "${CMAKE_CURRENT_LIST_DIR}/test/test_common.impl.hpp"
FILES Src ${perf_srcs}
FILES Include ${perf_hdrs}
)
ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
ocv_option(${the_module}_PERF_CLCAFFE "Add performance tests of clCaffe framework" OFF)
if(BUILD_PERF_TESTS)
if (${the_module}_PERF_CAFFE)
find_package(Caffe QUIET)
if (Caffe_FOUND)
add_definitions(-DHAVE_CAFFE=1)
ocv_target_link_libraries(opencv_perf_dnn caffe)
endif()
elseif(${the_module}_PERF_CLCAFFE)
find_package(Caffe QUIET)
if (Caffe_FOUND)
add_definitions(-DHAVE_CLCAFFE=1)
ocv_target_link_libraries(opencv_perf_dnn caffe)
endif()
endif()
endif()