Open Source Computer Vision Library https://opencv.org/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

337 lines
12 KiB

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_CUDNN_HPP
#define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_CUDNN_HPP
#include "../pointer.hpp"
#include <cudnn.h>
#include <cstddef>
#include <array>
#include <algorithm>
#include <functional>
#include <numeric>
#include <vector>
#include <type_traits>
#include <iterator>
#define CUDA4DNN_CHECK_CUDNN(call) \
::cv::dnn::cuda4dnn::csl::cudnn::detail::check((call), CV_Func, __FILE__, __LINE__)
namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
/** @brief exception class for errors thrown by the cuDNN API */
class cuDNNException : public CUDAException {
public:
cuDNNException(cudnnStatus_t code, const std::string& msg, const std::string& func, const std::string& file, int line)
: CUDAException(Error::GpuApiCallError, msg, func, file, line), cudnnError{code}
{
}
cudnnStatus_t getCUDNNStatus() const noexcept { return cudnnError; }
private:
cudnnStatus_t cudnnError;
};
namespace detail {
inline void check(cudnnStatus_t status, const char* func, const char* file, int line) {
if (status != CUDNN_STATUS_SUCCESS)
throw cuDNNException(status, cudnnGetErrorString(status), func, file, line);
}
/** get_data_type<T> returns the equivalent cudnn enumeration constant for type T */
using cudnn_data_enum_type = decltype(CUDNN_DATA_FLOAT);
template <class> cudnn_data_enum_type get_data_type();
template <> inline cudnn_data_enum_type get_data_type<half>() { return CUDNN_DATA_HALF; }
template <> inline cudnn_data_enum_type get_data_type<float>() { return CUDNN_DATA_FLOAT; }
}
/** @brief noncopyable cuDNN smart handle
*
* UniqueHandle is a smart non-sharable wrapper for cuDNN handle which ensures that the handle
* is destroyed after use.
*/
class UniqueHandle {
public:
UniqueHandle() noexcept : handle{ nullptr } { }
UniqueHandle(UniqueHandle&) = delete;
UniqueHandle(UniqueHandle&& other) noexcept {
stream = std::move(other.stream);
handle = other.handle;
other.handle = nullptr;
}
/** creates a cuDNN handle and associates it with the stream specified
*
* Exception Guarantee: Basic
*/
UniqueHandle(Stream strm) : stream(std::move(strm)) {
CV_Assert(stream);
CUDA4DNN_CHECK_CUDNN(cudnnCreate(&handle));
try {
CUDA4DNN_CHECK_CUDNN(cudnnSetStream(handle, stream.get()));
} catch (...) {
/* cudnnDestroy won't throw if a valid handle is passed */
CUDA4DNN_CHECK_CUDNN(cudnnDestroy(handle));
throw;
}
}
~UniqueHandle() noexcept {
if (handle != nullptr) {
/* cudnnDestroy won't throw if a valid handle is passed */
CUDA4DNN_CHECK_CUDNN(cudnnDestroy(handle));
}
}
UniqueHandle& operator=(const UniqueHandle&) = delete;
UniqueHandle& operator=(UniqueHandle&& other) noexcept {
CV_Assert(other);
if (&other != this) {
UniqueHandle(std::move(*this)); /* destroy current handle */
stream = std::move(other.stream);
handle = other.handle;
other.handle = nullptr;
}
return *this;
}
/** returns the raw cuDNN handle */
cudnnHandle_t get() const noexcept {
CV_Assert(handle);
return handle;
}
/** returns true if the handle is valid */
explicit operator bool() const noexcept { return static_cast<bool>(handle); }
private:
Stream stream;
cudnnHandle_t handle;
};
/** @brief sharable cuDNN smart handle
*
* Handle is a smart sharable wrapper for cuDNN handle which ensures that the handle
* is destroyed after all references to the handle are destroyed. The handle must always
* be associated with a non-default stream. The stream must be specified during construction.
*
* @note Moving a Handle object to another invalidates the former
*/
class Handle {
public:
Handle() = default;
Handle(const Handle&) = default;
Handle(Handle&&) = default;
/** creates a cuDNN handle and associates it with the stream specified
*
* Exception Guarantee: Basic
*/
Handle(Stream strm) : handle(std::make_shared<UniqueHandle>(std::move(strm))) { }
Handle& operator=(const Handle&) = default;
Handle& operator=(Handle&&) = default;
/** returns true if the handle is valid */
explicit operator bool() const noexcept { return static_cast<bool>(handle); }
/** returns the raw cuDNN handle */
cudnnHandle_t get() const noexcept {
CV_Assert(handle);
return handle->get();
}
private:
std::shared_ptr<UniqueHandle> handle;
};
/** describe a tensor
*
* @tparam T type of elements in the tensor
*/
template <class T>
class TensorDescriptor {
public:
TensorDescriptor() noexcept : descriptor{ nullptr } { }
TensorDescriptor(const TensorDescriptor&) = delete;
TensorDescriptor(TensorDescriptor&& other) noexcept
: descriptor{ other.descriptor } {
other.descriptor = nullptr;
}
/** constructs a tensor descriptor from the axis lengths provided in \p shape
*
* Exception Guarantee: Basic
*/
template <class SequenceContainer, typename = decltype(std::begin(std::declval<SequenceContainer>()))>
TensorDescriptor(const SequenceContainer& shape) {
constructor(shape.begin(), shape.end());
}
/** constructs a tensor descriptor from the axis lengths provided in [begin, end)
*
* Exception Guarantee: Basic
*/
template <class ForwardItr, typename = typename std::enable_if<!std::is_integral<ForwardItr>::value, void>::type> // TODO is_iterator
TensorDescriptor(ForwardItr begin, ForwardItr end) {
constructor(begin, end);
}
/** constructs a tensor descriptor from the axis lengths provided as arguments
*
* Exception Guarantee: Basic
*/
template <class ...Sizes>
TensorDescriptor(Sizes ...sizes) {
static_assert(sizeof...(Sizes) <= CUDNN_DIM_MAX, "required rank exceeds maximum supported rank");
std::array<int, sizeof...(Sizes)> dims = { static_cast<int>(sizes)... };
constructor(std::begin(dims), std::end(dims));
}
~TensorDescriptor() noexcept {
if (descriptor != nullptr) {
/* cudnnDestroyTensorDescriptor will not fail */
CUDA4DNN_CHECK_CUDNN(cudnnDestroyTensorDescriptor(descriptor));
}
}
TensorDescriptor& operator=(const TensorDescriptor&) = delete;
TensorDescriptor& operator=(TensorDescriptor&& other) noexcept {
descriptor = other.descriptor;
other.descriptor = nullptr;
return *this;
};
cudnnTensorDescriptor_t get() const noexcept { return descriptor; }
private:
template <class ForwardItr>
void constructor(ForwardItr start, ForwardItr end) {
CV_Assert(start != end);
CV_Assert(std::distance(start, end) <= CUDNN_DIM_MAX);
CUDA4DNN_CHECK_CUDNN(cudnnCreateTensorDescriptor(&descriptor));
try {
/* cuDNN documentation recommends using the 4d tensor API whenever possible
* hence, we create a 4d tensor descriptors for 3d tensor
*/
const auto rank = std::distance(start, end);
if (rank <= 4) {
std::array<int, 4> dims;
std::fill(std::begin(dims), std::end(dims), 1);
/* suppose we have a 3d tensor, the first axis is the batch axis and
* the second axis is the channel axis (generally)
*
* cuDNN frequently assumes that the first axis is the batch axis and the
* second axis is the channel axis; hence, we copy the shape of a lower rank
* tensor to the beginning of `dims`
*/
std::copy(start, end, std::begin(dims));
CUDA4DNN_CHECK_CUDNN(
cudnnSetTensor4dDescriptor(descriptor,
CUDNN_TENSOR_NCHW, detail::get_data_type<T>(),
dims[0], dims[1], dims[2], dims[3]
)
);
} else {
std::vector<int> stride(rank);
stride.back() = 1;
/* WHAT WE HAVE NOW:
* stride[-1] = 1
* stride[-2] = garbage
* stride[-3] = garbage
* stride[-4] = garbage
* ...
*/
std::copy(start + 1, end, stride.begin());
/* WHAT WE HAVE NOW:
* stride[-1] = 1
* stride[-2] = dim[-1]
* stride[-3] = dim[-2]
* stride[-4] = dim[-3]
* ...
*/
std::partial_sum(stride.rbegin(), stride.rend(), stride.rbegin(), std::multiplies<int>());
/* WHAT WE HAVE NOW:
* stride[-1] = 1
* stride[-2] = stride[-1] * dim[-1]
* stride[-3] = stride[-2] * dim[-2]
* stride[-4] = stride[-3] * dim[-3]
* ...
*/
std::vector<int> dims(start, end);
CUDA4DNN_CHECK_CUDNN(
cudnnSetTensorNdDescriptor(descriptor,
detail::get_data_type<T>(), rank,
dims.data(), stride.data()
)
);
}
} catch (...) {
/* cudnnDestroyTensorDescriptor will not fail */
CUDA4DNN_CHECK_CUDNN(cudnnDestroyTensorDescriptor(descriptor));
throw;
}
}
cudnnTensorDescriptor_t descriptor;
};
/** An array of number fully packed tensor descriptors
*
* @tparam T type of elements in the tensor
*/
template<class T>
class TensorDescriptorsArray
{
public:
TensorDescriptorsArray() noexcept = default;
TensorDescriptorsArray(const TensorDescriptorsArray&) = delete;
TensorDescriptorsArray(TensorDescriptorsArray&& other) noexcept
: descriptors{std::move(other.descriptors)} {}
TensorDescriptorsArray(int seqLength, std::array<int, 3> dims)
{
for (int i = 0; i < seqLength; ++i)
{
descriptors.emplace_back(dims);
}
}
~TensorDescriptorsArray() noexcept = default;
TensorDescriptorsArray& operator=(const TensorDescriptorsArray&) = delete;
TensorDescriptorsArray& operator=(TensorDescriptorsArray&& other) noexcept
{
descriptors = std::move(other.descriptors);
return *this;
};
std::vector<cudnnTensorDescriptor_t> get() const noexcept
{
std::vector<cudnnTensorDescriptor_t> descPtrs;
descPtrs.reserve(descriptors.size());
for (auto& desc : descriptors)
{
descPtrs.push_back(desc.get());
}
return descPtrs;
}
private:
std::vector<TensorDescriptor<T>> descriptors;
};
}}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
#endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_HPP */