diff --git a/modules/dnn/src/cuda4dnn/primitives/scale_shift.hpp b/modules/dnn/src/cuda4dnn/primitives/scale_shift.hpp index 399cce0fcb..9da7ec3326 100644 --- a/modules/dnn/src/cuda4dnn/primitives/scale_shift.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/scale_shift.hpp @@ -19,25 +19,51 @@ namespace cv { namespace dnn { namespace cuda4dnn { + struct ScaleShiftConfiguration { + enum class OpMode { + NONE, + TRAINABLE, /* use a pretrained blob */ + UNTRAINABLE /* use another input */ + }; + + OpMode scaleMode; + OpMode shiftMode; + + std::size_t axis; + }; + template class ScaleShiftOp final : public CUDABackendNode { public: using wrapper_type = GetCUDABackendWrapperType; - ScaleShiftOp(csl::Stream stream_, std::size_t axis, const cv::Mat& weights, const cv::Mat& bias) - : stream(std::move(stream_)), axis{ axis } + ScaleShiftOp(csl::Stream stream_, const ScaleShiftConfiguration& config, const cv::Mat& weights, const cv::Mat& bias) + : stream(std::move(stream_)), axis{ config.axis } { - if (!weights.empty()) + scaleMode = config.scaleMode; + if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE) { + CV_Assert(!weights.empty()); weightsTensor = csl::makeTensorHeader(weights); csl::copyMatToTensor(weights, weightsTensor, stream); } - if (!bias.empty()) + shiftMode = config.shiftMode; + if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE) { + CV_Assert(!bias.empty()); biasTensor = csl::makeTensorHeader(bias); csl::copyMatToTensor(bias, biasTensor, stream); } + + CV_Assert(scaleMode != ScaleShiftConfiguration::OpMode::NONE || + shiftMode != ScaleShiftConfiguration::OpMode::NONE); + + if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE && + shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE) + { + CV_Error(cv::Error::StsNotImplemented, "scale and shift both in untrainable mode is not supported"); + } } void forward( @@ -53,40 +79,60 @@ namespace cv { namespace dnn { namespace cuda4dnn { auto output_wrapper = outputs[0].dynamicCast(); auto output = output_wrapper->getSpan(); + /* number of batches in the weights/bias + * trainable mode: same for all batches + * untrainable mode: could be different for different batch samples + */ + std::size_t parameter_batch_size = 1; + csl::TensorView weights; - if (weightsTensor.empty() && biasTensor.empty()) + if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE) + { + CV_Assert(!weightsTensor.empty()); + weights = csl::TensorView(weightsTensor); + } + else if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE) { CV_Assert(inputs.size() == 2); - - /* no explicit scale/shift values provided; use the second input as weights */ auto wrapper = inputs[1].dynamicCast(); weights = wrapper->getView(); + + parameter_batch_size = weights.get_axis_size(0); + CV_Assert(parameter_batch_size == input.get_axis_size(0)); } - else if (!weightsTensor.empty()) + + csl::TensorView bias; + if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE) { - weights = csl::TensorSpan(weightsTensor); + CV_Assert(!biasTensor.empty()); + bias = csl::TensorView(biasTensor); } + else if (shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE) + { + CV_Assert(inputs.size() == 2); + auto wrapper = inputs[1].dynamicCast(); + bias = wrapper->getView(); - csl::TensorView bias; - if (!biasTensor.empty()) - bias = csl::TensorSpan(biasTensor); + parameter_batch_size = bias.get_axis_size(0); + CV_Assert(parameter_batch_size == input.get_axis_size(0)); + } - const auto numParams = !weights.empty() ? weights.size() : bias.size(); - CV_Assert(numParams != 0); - if (!weightsTensor.empty() && !biasTensor.empty()) + CV_Assert(!weights.empty() || !bias.empty()); + if (!weights.empty() && !bias.empty()) { - CV_CheckEQ(weights.size(), bias.size(), "weights and bias size are not equal"); + CV_CheckEQ(weights.size(), bias.size(), "different broadcasting options for weights and bias is not supported"); } - /* the weights/bias might require broadcasting to scale/shift */ + const auto num_parameters = !weights.empty() ? weights.size() : bias.size(); + const auto mid_size = num_parameters / parameter_batch_size; + + /* the scale shift operation might require broadcasting */ const int end_axis = [&] { - for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++) - { - std::size_t size = input.size_range(axis, endAxis); - if (size == numParams) + for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++) { + if (input.size_range(axis, endAxis) == mid_size) return endAxis; } - CV_Assert(0 /* invalid weights matrix */); + CV_Assert(0 /* failed to find a broadcast config */); }(); std::size_t inner_size = input.size_range(end_axis, input.rank()); @@ -103,6 +149,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { csl::Stream stream; csl::Tensor weightsTensor, biasTensor; std::size_t axis; + + ScaleShiftConfiguration::OpMode scaleMode, shiftMode; }; }}} /* namespace cv::dnn::cuda4dnn */ diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index ee8d6cc379..30fdb47595 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -159,14 +159,49 @@ public: CV_Assert(!blobs.empty() || inputs.size() == 2); - cv::Mat weightsMat = hasWeights ? blobs[0] : Mat(); + auto weightsMat = Mat(), biasMat = Mat(); - /* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0] - * in either case, it is at the end of the blobs vector => bias = blobs.back() - */ - cv::Mat biasMat = hasBias ? blobs.back() : Mat(); + cuda4dnn::ScaleShiftConfiguration config; + if (hasWeights) + { + if (blobs.empty()) + { + config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE; + } + else + { + weightsMat = blobs[0]; + config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE; + } + } + else + { + config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE; + } + + if (hasBias) + { + if(blobs.empty()) + { + config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE; + } + else + { + /* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0] + * in either case, it is at the end of the blobs vector => bias = blobs.back() + */ + biasMat = blobs.back(); + config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE; + } + } + else + { + config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE; + } + + config.axis = axis; - return make_cuda_node(preferableTarget, std::move(context->stream), axis, weightsMat, biasMat); + return make_cuda_node(preferableTarget, std::move(context->stream), config, weightsMat, biasMat); } #endif diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 15619c7f53..052027e210 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -580,8 +580,8 @@ TEST_P(Test_Darknet_layers, convolutional) TEST_P(Test_Darknet_layers, scale_channels) { - // TODO: test fails for batches due to a bug/missing feature in ScaleLayer - testDarknetLayer("scale_channels", false, false); + bool testBatches = backend == DNN_BACKEND_CUDA; + testDarknetLayer("scale_channels", false, testBatches); } TEST_P(Test_Darknet_layers, connected)