From ea41f89b40392fc0c5bdfe0f0c43c4820c4b2eac Mon Sep 17 00:00:00 2001 From: Sergei Slashchinin <62052793+sl-sergei@users.noreply.github.com> Date: Fri, 22 Jan 2021 01:16:56 +0300 Subject: [PATCH] Merge pull request #19058 from sl-sergei:cuda_1d Conv1D and Pool1D for CUDA backend * CUDA-independent changes * Add Conv1D and Pool1D for CUDA backend * CUDA-independent changes * Fix typo * fix comment * Update fix * make changes more correct for pooling layer * Minor fixes for review * Split skip blocks --- modules/dnn/src/cuda/max_unpooling.cu | 25 ++++++++++++++++--- .../src/cuda4dnn/primitives/convolution.hpp | 2 +- .../src/cuda4dnn/primitives/max_unpooling.hpp | 5 ++-- modules/dnn/src/layers/convolution_layer.cpp | 22 ++++++++++++++-- modules/dnn/src/layers/pooling_layer.cpp | 24 +++++++++++++----- modules/dnn/test/test_onnx_importer.cpp | 13 +++++++++- 6 files changed, 74 insertions(+), 17 deletions(-) diff --git a/modules/dnn/src/cuda/max_unpooling.cu b/modules/dnn/src/cuda/max_unpooling.cu index fbfb5ae432..3bfd75f926 100644 --- a/modules/dnn/src/cuda/max_unpooling.cu +++ b/modules/dnn/src/cuda/max_unpooling.cu @@ -31,7 +31,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { namespace raw { template ::type = true> /* Order has been hardcoded; see code */ + typename std::enable_if::type = true> /* Order has been hardcoded; see code */ __global__ void max_pooling_with_indices( Span output, Span indices, View input, size_type channels, array out_spatial_dims, array in_spatial_dims, @@ -72,7 +72,22 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { in_spatial_size *= in_spatial_dims[i]; const auto outer_offset = (n * channels + c) * in_spatial_size; - if (Order == 2) { + if (Order == 1) { + array idx; + for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) { + index_type offset = 0; + index_type stride = 1; + for (int i = Order - 1; i >= 0; i--) { + offset += stride * idx[i]; + stride *= in_spatial_dims[i]; + } + + if (input[outer_offset + offset] > max_value) { + max_idx = offset; + max_value = input[outer_offset + offset]; + } + } + } else if (Order == 2) { array idx; for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) { for (idx[1] = start[1]; idx[1] != end[1]; idx[1]++) { @@ -206,8 +221,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { out_spatial_dims[i] = output.get_axis_size(2 + i); } - /* only max_pooling2d and max_pooling3d are supported */ - CV_Assert(2 <= order && order <= 3); + CV_Assert(1 <= order && order <= 3); std::size_t channels = input.get_axis_size(1); if (order == 3) { launch_max_pooling_kernel(stream, output, indices, input, channels, @@ -215,6 +229,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { } else if (order == 2) { launch_max_pooling_kernel(stream, output, indices, input, channels, out_spatial_dims, in_spatial_dims, window_size, strides, padding_left); + } else if (order == 1) { + launch_max_pooling_kernel(stream, output, indices, input, channels, + out_spatial_dims, in_spatial_dims, window_size, strides, padding_left); } } diff --git a/modules/dnn/src/cuda4dnn/primitives/convolution.hpp b/modules/dnn/src/cuda4dnn/primitives/convolution.hpp index 8d788f05dc..12cf97404e 100644 --- a/modules/dnn/src/cuda4dnn/primitives/convolution.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/convolution.hpp @@ -103,7 +103,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { const auto groups = config.groups; - CV_Assert (1 < convolution_order && convolution_order <= 3); + CV_Assert (1 <= convolution_order && convolution_order <= 3); const auto rank = input_shape.size(); const auto output_feature_maps = output_shape[1]; diff --git a/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp b/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp index 1102dc56fa..fc1002fc4e 100644 --- a/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp @@ -50,13 +50,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { window_size = config.window_size; const auto pooling_order = window_size.size(); - CV_Assert(pooling_order >= 1); strides = config.strides; CV_Assert(pooling_order == strides.size()); - if (pooling_order != 2 && pooling_order != 3) - CV_Error(Error::StsNotImplemented, "Only 2D/3D max-pooling are supported."); + if (pooling_order < 1 || pooling_order > 3) + CV_Error(Error::StsNotImplemented, "Only 1D/2D/3D max-pooling are supported."); padding_left.resize(pooling_order); if (config.padMode == MaxPoolingConfiguration::PaddingMode::MANUAL) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index a30be5e7c2..7ef20f07a2 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -125,6 +125,9 @@ public: { kernel_size.assign(1, kernel_size[0]); strides.assign(1, strides[0]); + dilations.assign(1, dilations[0]); + pads_begin.assign(1, pads_begin[0]); + pads_end.assign(1, pads_end[0]); } CV_Assert(weightShape.dims() == kernel_size.size() + 2); for (int i = 0; i < kernel_size.size(); i++) { @@ -311,8 +314,8 @@ public: #ifdef HAVE_CUDA if (backendId == DNN_BACKEND_CUDA) { - /* only convolution 2d and 3d supported */ - if (ksize == 2 || ksize == 3) + /* only 1d, 2d and 3d convolutions supported */ + if (ksize > 0 && ksize <= 3) return true; return false; @@ -2001,6 +2004,21 @@ public: const auto groups = input_feature_maps / input_feature_maps_per_group; ConvolutionConfiguration config; + + if (input_shape.size() == 3) + { + // Conv1D + // We add an extra dim for input and output tensors, because CuDNN doesn't support convolution with 3D tensors + input_shape.insert(std::end(input_shape) - 1, 1); + output_shape.insert(std::end(output_shape) - 1, 1); + + // Do the similar thing for the other parameters + pads_begin.insert(std::begin(pads_begin), 0); + pads_end.insert(std::begin(pads_end), 0); + strides.insert(std::begin(strides), 1); + dilations.insert(std::begin(dilations), 1); + kernel_size.insert(std::begin(kernel_size), 1); + } config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size)); config.dilations.assign(std::begin(dilations), std::end(dilations)); config.strides.assign(std::begin(strides), std::end(strides)); diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index e2c22181de..ac78c5eeab 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -178,14 +178,13 @@ public: if (inputs[0].dims == 3) { - //Pool1D - kernel_size.erase(kernel_size.begin() + 1); - strides.erase(strides.begin() + 1); - pads_begin.erase(pads_begin.begin() + 1); - pads_end.erase(pads_end.begin() + 1); + // Pool1D + kernel_size.assign(1, kernel_size[0]); + strides.assign(1, strides[0]); + pads_begin.assign(1, pads_begin[0]); + pads_end.assign(1, pads_end[0]); } - #ifdef HAVE_OPENCL poolOp.release(); #endif @@ -392,6 +391,19 @@ public: return make_cuda_node(preferableTarget, std::move(context->stream), config); } + if (input_shape.size() == 3) + { + // Pool1D + // We add an extra dim for input tensor, because CuDNN support pooling only with 2 and 3 spatial dimensions + input_shape.insert(std::end(input_shape) - 1, 1); + + // Do the similar thing for the other parameters + pads_begin.insert(std::begin(pads_begin), 0); + pads_end.insert(std::begin(pads_end), 0); + strides.insert(std::begin(strides), 1); + kernel_size.insert(std::begin(kernel_size), 1); + } + PoolingConfiguration config; if (type == MAX) { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index dcb8678cad..a9d781dfee 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -122,7 +122,8 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight) if (backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported - + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported String basename = "conv_variable_w"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); ASSERT_FALSE(net.empty()); @@ -152,6 +153,8 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) if (backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported String basename = "conv_variable_wb"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); @@ -715,6 +718,10 @@ TEST_P(Test_ONNX_layers, Conv1d_bias) TEST_P(Test_ONNX_layers, Conv1d_variable_weight) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported String basename = "conv1d_variable_w"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); ASSERT_FALSE(net.empty()); @@ -735,6 +742,10 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight) TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);