Merge pull request #16868 from YashasSamaga:cuda4dnn-scale-fix_and_improvements

pull/16888/head
Alexander Alekhin 5 years ago
commit 4dfa798e75
  1. 92
      modules/dnn/src/cuda4dnn/primitives/scale_shift.hpp
  2. 47
      modules/dnn/src/layers/scale_layer.cpp
  3. 4
      modules/dnn/test/test_darknet_importer.cpp

@ -19,25 +19,51 @@
namespace cv { namespace dnn { namespace cuda4dnn { namespace cv { namespace dnn { namespace cuda4dnn {
struct ScaleShiftConfiguration {
enum class OpMode {
NONE,
TRAINABLE, /* use a pretrained blob */
UNTRAINABLE /* use another input */
};
OpMode scaleMode;
OpMode shiftMode;
std::size_t axis;
};
template <class T> template <class T>
class ScaleShiftOp final : public CUDABackendNode { class ScaleShiftOp final : public CUDABackendNode {
public: public:
using wrapper_type = GetCUDABackendWrapperType<T>; using wrapper_type = GetCUDABackendWrapperType<T>;
ScaleShiftOp(csl::Stream stream_, std::size_t axis, const cv::Mat& weights, const cv::Mat& bias) ScaleShiftOp(csl::Stream stream_, const ScaleShiftConfiguration& config, const cv::Mat& weights, const cv::Mat& bias)
: stream(std::move(stream_)), axis{ axis } : stream(std::move(stream_)), axis{ config.axis }
{ {
if (!weights.empty()) scaleMode = config.scaleMode;
if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
{ {
CV_Assert(!weights.empty());
weightsTensor = csl::makeTensorHeader<T>(weights); weightsTensor = csl::makeTensorHeader<T>(weights);
csl::copyMatToTensor<T>(weights, weightsTensor, stream); csl::copyMatToTensor<T>(weights, weightsTensor, stream);
} }
if (!bias.empty()) shiftMode = config.shiftMode;
if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
{ {
CV_Assert(!bias.empty());
biasTensor = csl::makeTensorHeader<T>(bias); biasTensor = csl::makeTensorHeader<T>(bias);
csl::copyMatToTensor<T>(bias, biasTensor, stream); csl::copyMatToTensor<T>(bias, biasTensor, stream);
} }
CV_Assert(scaleMode != ScaleShiftConfiguration::OpMode::NONE ||
shiftMode != ScaleShiftConfiguration::OpMode::NONE);
if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE &&
shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE)
{
CV_Error(cv::Error::StsNotImplemented, "scale and shift both in untrainable mode is not supported");
}
} }
void forward( void forward(
@ -53,40 +79,60 @@ namespace cv { namespace dnn { namespace cuda4dnn {
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>(); auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan(); auto output = output_wrapper->getSpan();
/* number of batches in the weights/bias
* trainable mode: same for all batches
* untrainable mode: could be different for different batch samples
*/
std::size_t parameter_batch_size = 1;
csl::TensorView<T> weights; csl::TensorView<T> weights;
if (weightsTensor.empty() && biasTensor.empty()) if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
{
CV_Assert(!weightsTensor.empty());
weights = csl::TensorView<T>(weightsTensor);
}
else if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE)
{ {
CV_Assert(inputs.size() == 2); CV_Assert(inputs.size() == 2);
/* no explicit scale/shift values provided; use the second input as weights */
auto wrapper = inputs[1].dynamicCast<wrapper_type>(); auto wrapper = inputs[1].dynamicCast<wrapper_type>();
weights = wrapper->getView(); weights = wrapper->getView();
parameter_batch_size = weights.get_axis_size(0);
CV_Assert(parameter_batch_size == input.get_axis_size(0));
} }
else if (!weightsTensor.empty())
csl::TensorView<T> bias;
if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE)
{ {
weights = csl::TensorSpan<T>(weightsTensor); CV_Assert(!biasTensor.empty());
bias = csl::TensorView<T>(biasTensor);
} }
else if (shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE)
{
CV_Assert(inputs.size() == 2);
auto wrapper = inputs[1].dynamicCast<wrapper_type>();
bias = wrapper->getView();
csl::TensorView<T> bias; parameter_batch_size = bias.get_axis_size(0);
if (!biasTensor.empty()) CV_Assert(parameter_batch_size == input.get_axis_size(0));
bias = csl::TensorSpan<T>(biasTensor); }
const auto numParams = !weights.empty() ? weights.size() : bias.size(); CV_Assert(!weights.empty() || !bias.empty());
CV_Assert(numParams != 0); if (!weights.empty() && !bias.empty())
if (!weightsTensor.empty() && !biasTensor.empty())
{ {
CV_CheckEQ(weights.size(), bias.size(), "weights and bias size are not equal"); CV_CheckEQ(weights.size(), bias.size(), "different broadcasting options for weights and bias is not supported");
} }
/* the weights/bias might require broadcasting to scale/shift */ const auto num_parameters = !weights.empty() ? weights.size() : bias.size();
const auto mid_size = num_parameters / parameter_batch_size;
/* the scale shift operation might require broadcasting */
const int end_axis = [&] { const int end_axis = [&] {
for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++) for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++) {
{ if (input.size_range(axis, endAxis) == mid_size)
std::size_t size = input.size_range(axis, endAxis);
if (size == numParams)
return endAxis; return endAxis;
} }
CV_Assert(0 /* invalid weights matrix */); CV_Assert(0 /* failed to find a broadcast config */);
}(); }();
std::size_t inner_size = input.size_range(end_axis, input.rank()); std::size_t inner_size = input.size_range(end_axis, input.rank());
@ -103,6 +149,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
csl::Stream stream; csl::Stream stream;
csl::Tensor<T> weightsTensor, biasTensor; csl::Tensor<T> weightsTensor, biasTensor;
std::size_t axis; std::size_t axis;
ScaleShiftConfiguration::OpMode scaleMode, shiftMode;
}; };
}}} /* namespace cv::dnn::cuda4dnn */ }}} /* namespace cv::dnn::cuda4dnn */

@ -159,14 +159,49 @@ public:
CV_Assert(!blobs.empty() || inputs.size() == 2); CV_Assert(!blobs.empty() || inputs.size() == 2);
cv::Mat weightsMat = hasWeights ? blobs[0] : Mat(); auto weightsMat = Mat(), biasMat = Mat();
/* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0] cuda4dnn::ScaleShiftConfiguration config;
* in either case, it is at the end of the blobs vector => bias = blobs.back() if (hasWeights)
*/ {
cv::Mat biasMat = hasBias ? blobs.back() : Mat(); if (blobs.empty())
{
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
}
else
{
weightsMat = blobs[0];
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
}
}
else
{
config.scaleMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
}
if (hasBias)
{
if(blobs.empty())
{
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::UNTRAINABLE;
}
else
{
/* if the weights are provided, bias will be in blobs[1]; otherwise, it will be in blobs[0]
* in either case, it is at the end of the blobs vector => bias = blobs.back()
*/
biasMat = blobs.back();
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::TRAINABLE;
}
}
else
{
config.shiftMode = cuda4dnn::ScaleShiftConfiguration::OpMode::NONE;
}
config.axis = axis;
return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), axis, weightsMat, biasMat); return make_cuda_node<cuda4dnn::ScaleShiftOp>(preferableTarget, std::move(context->stream), config, weightsMat, biasMat);
} }
#endif #endif

@ -580,8 +580,8 @@ TEST_P(Test_Darknet_layers, convolutional)
TEST_P(Test_Darknet_layers, scale_channels) TEST_P(Test_Darknet_layers, scale_channels)
{ {
// TODO: test fails for batches due to a bug/missing feature in ScaleLayer bool testBatches = backend == DNN_BACKEND_CUDA;
testDarknetLayer("scale_channels", false, false); testDarknetLayer("scale_channels", false, testBatches);
} }
TEST_P(Test_Darknet_layers, connected) TEST_P(Test_Darknet_layers, connected)

Loading…
Cancel
Save