Merge pull request #19058 from sl-sergei:cuda_1d

Conv1D and Pool1D for CUDA backend

* CUDA-independent changes

* Add Conv1D and Pool1D for CUDA backend

* CUDA-independent changes

* Fix typo

* fix comment

* Update fix

* make changes more correct for pooling layer

* Minor fixes for review

* Split skip blocks
pull/19378/head
Sergei Slashchinin 4 years ago committed by GitHub
parent 7a790d0d35
commit ea41f89b40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 25
      modules/dnn/src/cuda/max_unpooling.cu
  2. 2
      modules/dnn/src/cuda4dnn/primitives/convolution.hpp
  3. 5
      modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp
  4. 22
      modules/dnn/src/layers/convolution_layer.cpp
  5. 22
      modules/dnn/src/layers/pooling_layer.cpp
  6. 13
      modules/dnn/test/test_onnx_importer.cpp

@ -31,7 +31,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
namespace raw { namespace raw {
template <class T, std::size_t Order, template <class T, std::size_t Order,
typename std::enable_if<Order == 2 || Order == 3, bool>::type = true> /* Order has been hardcoded; see code */ typename std::enable_if<Order == 1 || Order == 2 || Order == 3, bool>::type = true> /* Order has been hardcoded; see code */
__global__ void max_pooling_with_indices( __global__ void max_pooling_with_indices(
Span<T> output, Span<T> indices, View<T> input, size_type channels, Span<T> output, Span<T> indices, View<T> input, size_type channels,
array<size_type, Order> out_spatial_dims, array<size_type, Order> in_spatial_dims, array<size_type, Order> out_spatial_dims, array<size_type, Order> in_spatial_dims,
@ -72,7 +72,22 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
in_spatial_size *= in_spatial_dims[i]; in_spatial_size *= in_spatial_dims[i];
const auto outer_offset = (n * channels + c) * in_spatial_size; const auto outer_offset = (n * channels + c) * in_spatial_size;
if (Order == 2) { if (Order == 1) {
array<index_type, Order> idx;
for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) {
index_type offset = 0;
index_type stride = 1;
for (int i = Order - 1; i >= 0; i--) {
offset += stride * idx[i];
stride *= in_spatial_dims[i];
}
if (input[outer_offset + offset] > max_value) {
max_idx = offset;
max_value = input[outer_offset + offset];
}
}
} else if (Order == 2) {
array<index_type, Order> idx; array<index_type, Order> idx;
for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) { for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) {
for (idx[1] = start[1]; idx[1] != end[1]; idx[1]++) { for (idx[1] = start[1]; idx[1] != end[1]; idx[1]++) {
@ -206,8 +221,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
out_spatial_dims[i] = output.get_axis_size(2 + i); out_spatial_dims[i] = output.get_axis_size(2 + i);
} }
/* only max_pooling2d and max_pooling3d are supported */ CV_Assert(1 <= order && order <= 3);
CV_Assert(2 <= order && order <= 3);
std::size_t channels = input.get_axis_size(1); std::size_t channels = input.get_axis_size(1);
if (order == 3) { if (order == 3) {
launch_max_pooling_kernel<T, 3>(stream, output, indices, input, channels, launch_max_pooling_kernel<T, 3>(stream, output, indices, input, channels,
@ -215,6 +229,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
} else if (order == 2) { } else if (order == 2) {
launch_max_pooling_kernel<T, 2>(stream, output, indices, input, channels, launch_max_pooling_kernel<T, 2>(stream, output, indices, input, channels,
out_spatial_dims, in_spatial_dims, window_size, strides, padding_left); out_spatial_dims, in_spatial_dims, window_size, strides, padding_left);
} else if (order == 1) {
launch_max_pooling_kernel<T, 1>(stream, output, indices, input, channels,
out_spatial_dims, in_spatial_dims, window_size, strides, padding_left);
} }
} }

@ -103,7 +103,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
const auto groups = config.groups; const auto groups = config.groups;
CV_Assert (1 < convolution_order && convolution_order <= 3); CV_Assert (1 <= convolution_order && convolution_order <= 3);
const auto rank = input_shape.size(); const auto rank = input_shape.size();
const auto output_feature_maps = output_shape[1]; const auto output_feature_maps = output_shape[1];

@ -50,13 +50,12 @@ namespace cv { namespace dnn { namespace cuda4dnn {
window_size = config.window_size; window_size = config.window_size;
const auto pooling_order = window_size.size(); const auto pooling_order = window_size.size();
CV_Assert(pooling_order >= 1);
strides = config.strides; strides = config.strides;
CV_Assert(pooling_order == strides.size()); CV_Assert(pooling_order == strides.size());
if (pooling_order != 2 && pooling_order != 3) if (pooling_order < 1 || pooling_order > 3)
CV_Error(Error::StsNotImplemented, "Only 2D/3D max-pooling are supported."); CV_Error(Error::StsNotImplemented, "Only 1D/2D/3D max-pooling are supported.");
padding_left.resize(pooling_order); padding_left.resize(pooling_order);
if (config.padMode == MaxPoolingConfiguration::PaddingMode::MANUAL) if (config.padMode == MaxPoolingConfiguration::PaddingMode::MANUAL)

@ -125,6 +125,9 @@ public:
{ {
kernel_size.assign(1, kernel_size[0]); kernel_size.assign(1, kernel_size[0]);
strides.assign(1, strides[0]); strides.assign(1, strides[0]);
dilations.assign(1, dilations[0]);
pads_begin.assign(1, pads_begin[0]);
pads_end.assign(1, pads_end[0]);
} }
CV_Assert(weightShape.dims() == kernel_size.size() + 2); CV_Assert(weightShape.dims() == kernel_size.size() + 2);
for (int i = 0; i < kernel_size.size(); i++) { for (int i = 0; i < kernel_size.size(); i++) {
@ -311,8 +314,8 @@ public:
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
if (backendId == DNN_BACKEND_CUDA) if (backendId == DNN_BACKEND_CUDA)
{ {
/* only convolution 2d and 3d supported */ /* only 1d, 2d and 3d convolutions supported */
if (ksize == 2 || ksize == 3) if (ksize > 0 && ksize <= 3)
return true; return true;
return false; return false;
@ -2001,6 +2004,21 @@ public:
const auto groups = input_feature_maps / input_feature_maps_per_group; const auto groups = input_feature_maps / input_feature_maps_per_group;
ConvolutionConfiguration config; ConvolutionConfiguration config;
if (input_shape.size() == 3)
{
// Conv1D
// We add an extra dim for input and output tensors, because CuDNN doesn't support convolution with 3D tensors
input_shape.insert(std::end(input_shape) - 1, 1);
output_shape.insert(std::end(output_shape) - 1, 1);
// Do the similar thing for the other parameters
pads_begin.insert(std::begin(pads_begin), 0);
pads_end.insert(std::begin(pads_end), 0);
strides.insert(std::begin(strides), 1);
dilations.insert(std::begin(dilations), 1);
kernel_size.insert(std::begin(kernel_size), 1);
}
config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size)); config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size));
config.dilations.assign(std::begin(dilations), std::end(dilations)); config.dilations.assign(std::begin(dilations), std::end(dilations));
config.strides.assign(std::begin(strides), std::end(strides)); config.strides.assign(std::begin(strides), std::end(strides));

@ -179,13 +179,12 @@ public:
if (inputs[0].dims == 3) if (inputs[0].dims == 3)
{ {
// Pool1D // Pool1D
kernel_size.erase(kernel_size.begin() + 1); kernel_size.assign(1, kernel_size[0]);
strides.erase(strides.begin() + 1); strides.assign(1, strides[0]);
pads_begin.erase(pads_begin.begin() + 1); pads_begin.assign(1, pads_begin[0]);
pads_end.erase(pads_end.begin() + 1); pads_end.assign(1, pads_end[0]);
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
poolOp.release(); poolOp.release();
#endif #endif
@ -392,6 +391,19 @@ public:
return make_cuda_node<cuda4dnn::MaxPoolingOp>(preferableTarget, std::move(context->stream), config); return make_cuda_node<cuda4dnn::MaxPoolingOp>(preferableTarget, std::move(context->stream), config);
} }
if (input_shape.size() == 3)
{
// Pool1D
// We add an extra dim for input tensor, because CuDNN support pooling only with 2 and 3 spatial dimensions
input_shape.insert(std::end(input_shape) - 1, 1);
// Do the similar thing for the other parameters
pads_begin.insert(std::begin(pads_begin), 0);
pads_end.insert(std::begin(pads_end), 0);
strides.insert(std::begin(strides), 1);
kernel_size.insert(std::begin(kernel_size), 1);
}
PoolingConfiguration config; PoolingConfiguration config;
if (type == MAX) if (type == MAX)
{ {

@ -122,7 +122,8 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight)
if (backend == DNN_BACKEND_CUDA) if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
String basename = "conv_variable_w"; String basename = "conv_variable_w";
Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
ASSERT_FALSE(net.empty()); ASSERT_FALSE(net.empty());
@ -152,6 +153,8 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias)
if (backend == DNN_BACKEND_CUDA) if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
String basename = "conv_variable_wb"; String basename = "conv_variable_wb";
Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
@ -715,6 +718,10 @@ TEST_P(Test_ONNX_layers, Conv1d_bias)
TEST_P(Test_ONNX_layers, Conv1d_variable_weight) TEST_P(Test_ONNX_layers, Conv1d_variable_weight)
{ {
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
String basename = "conv1d_variable_w"; String basename = "conv1d_variable_w";
Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); Net net = readNetFromONNX(_tf("models/" + basename + ".onnx"));
ASSERT_FALSE(net.empty()); ASSERT_FALSE(net.empty());
@ -735,6 +742,10 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight)
TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias) TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias)
{ {
if (backend == DNN_BACKEND_CUDA)
applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported
if (backend == DNN_BACKEND_VKCOM)
applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{ {
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

Loading…
Cancel
Save