fix CUDNN_STATUS_NOT_SUPPORTED, remove redundant fusion checks

pull/16888/head
YashasSamaga 5 years ago
parent 4dfa798e75
commit 4e8cd4629c
  1. 12
      modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp
  2. 22
      modules/dnn/src/cuda4dnn/primitives/convolution.hpp
  3. 10
      modules/dnn/src/dnn.cpp

@ -27,13 +27,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
/** @brief exception class for errors thrown by the cuDNN API */
class cuDNNException : public CUDAException {
public:
using CUDAException::CUDAException;
cuDNNException(cudnnStatus_t code, const std::string& msg, const std::string& func, const std::string& file, int line)
: CUDAException(Error::GpuApiCallError, msg, func, file, line), cudnnError{code}
{
}
cudnnStatus_t getCUDNNStatus() const noexcept { return cudnnError; }
private:
cudnnStatus_t cudnnError;
};
namespace detail {
inline void check(cudnnStatus_t status, const char* func, const char* file, int line) {
if (status != CUDNN_STATUS_SUCCESS)
throw cuDNNException(Error::GpuApiCallError, cudnnGetErrorString(status), func, file, line);
throw cuDNNException(status, cudnnGetErrorString(status), func, file, line);
}
/** get_data_type<T> returns the equivalent cudnn enumeration constant for type T */

@ -261,16 +261,32 @@ namespace cv { namespace dnn { namespace cuda4dnn {
input = transformed_input;
}
auto conv_scratchpad = allocator.get_instance();
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan();
if (fusion_location == InternalFusionLocation::CUDNN)
{
convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, allocator.get_instance());
try
{
convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, conv_scratchpad);
}
catch(const csl::cudnn::cuDNNException& ex)
{
if (ex.getCUDNNStatus() == CUDNN_STATUS_NOT_SUPPORTED)
{
/* drop cuDNN fusion and use the native fusion path */
fusion_location = InternalFusionLocation::NATIVE;
}
else
throw;
}
}
else
if (fusion_location == InternalFusionLocation::NATIVE)
{
convoluter.convolve(output, input, filtersTensor, allocator.get_instance());
convoluter.convolve(output, input, filtersTensor, conv_scratchpad);
if (!biasTensor.empty())
{
std::size_t inner_size = output.size_range(2, output.rank());

@ -2580,16 +2580,6 @@ struct Net::Impl
nextData->type != "Power")
break;
if (IS_DNN_CUDA_TARGET(preferableTarget) &&
nextData->type != "ReLU" &&
nextData->type != "ReLU6" &&
nextData->type != "Power" &&
nextData->type != "TanH" &&
nextData->type != "Sigmoid" &&
nextData->type != "Swish" &&
nextData->type != "Mish")
break;
Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
if (nextActivLayer.empty())
break;

Loading…
Cancel
Save