From 490908f0fffcf08b6e076c2b931a78dfb2df9667 Mon Sep 17 00:00:00 2001 From: Yashas Samaga B L Date: Tue, 10 Mar 2020 15:15:19 +0530 Subject: [PATCH] Merge pull request #16436 from YashasSamaga:feature-enetb0-yolo dnn(darknet-importer): add grouped convolutions, sigmoid, swish, scale_channels * update darknet importer to support enetb0-yolo * remove dropout (pr16438) and fix formatting * add test for scale_channels * disable batch testing for scale channels * do not set LayerParams::name * merge all activations into setActivation --- modules/dnn/src/darknet/darknet_io.cpp | 80 ++++++++++++++++++---- modules/dnn/test/test_darknet_importer.cpp | 10 ++- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index b93d740109..ff322bc188 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -149,7 +149,7 @@ namespace cv { void setConvolution(int kernel, int pad, int stride, - int filters_num, int channels_num, int use_batch_normalize) + int filters_num, int channels_num, int groups, int use_batch_normalize) { cv::dnn::LayerParams conv_param = getParamConvolution(kernel, pad, stride, filters_num); @@ -162,6 +162,8 @@ namespace cv { conv_param.set("bias_term", true); } + conv_param.set("group", groups); + lp.layer_name = layer_name; lp.layer_type = conv_param.type; lp.layerParams = conv_param; @@ -215,15 +217,30 @@ namespace cv { fused_layer_names.push_back(last_layer); } - void setReLU() + void setActivation(String type) { cv::dnn::LayerParams activation_param; - activation_param.set("negative_slope", 0.1f); - activation_param.name = "ReLU-name"; - activation_param.type = "ReLU"; + if (type == "relu") + { + activation_param.set("negative_slope", 0.1f); + activation_param.type = "ReLU"; + } + else if (type == "swish") + { + activation_param.type = "Swish"; + } + else if (type == "logistic") + { + activation_param.type = "Sigmoid"; + } + else + { + CV_Error(cv::Error::StsParseError, "Unsupported activation: " + type); + } + + std::string layer_name = cv::format("%s_%d", type.c_str(), layer_id); darknet::LayerParameter lp; - std::string layer_name = cv::format("relu_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = activation_param.type; lp.layerParams = activation_param; @@ -487,6 +504,25 @@ namespace cv { fused_layer_names.push_back(last_layer); } + void setScaleChannels(int from) + { + cv::dnn::LayerParams shortcut_param; + shortcut_param.type = "Scale"; + + darknet::LayerParameter lp; + std::string layer_name = cv::format("scale_channels_%d", layer_id); + lp.layer_name = layer_name; + lp.layer_type = shortcut_param.type; + lp.layerParams = shortcut_param; + lp.bottom_indexes.push_back(fused_layer_names.at(from)); + lp.bottom_indexes.push_back(last_layer); + last_layer = layer_name; + net->layers.push_back(lp); + + layer_id++; + fused_layer_names.push_back(last_layer); + } + void setUpsample(int scaleFactor) { cv::dnn::LayerParams param; @@ -608,6 +644,7 @@ namespace cv { int padding = getParam(layer_params, "padding", 0); int stride = getParam(layer_params, "stride", 1); int filters = getParam(layer_params, "filters", -1); + int groups = getParam(layer_params, "groups", 1); bool batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; int flipped = getParam(layer_params, "flipped", 0); if (flipped == 1) @@ -618,9 +655,10 @@ namespace cv { CV_Assert(kernel_size > 0 && filters > 0); CV_Assert(tensor_shape[0] > 0); + CV_Assert(tensor_shape[0] % groups == 0); setParams.setConvolution(kernel_size, padding, stride, filters, tensor_shape[0], - batch_normalize); + groups, batch_normalize); tensor_shape[0] = filters; tensor_shape[1] = (tensor_shape[1] - kernel_size + 2 * padding) / stride + 1; @@ -727,6 +765,14 @@ namespace cv { from = from < 0 ? from + layers_counter : from; setParams.setShortcut(from, alpha); } + else if (layer_type == "scale_channels") + { + std::string bottom_layer = getParam(layer_params, "from", ""); + CV_Assert(!bottom_layer.empty()); + int from = std::atoi(bottom_layer.c_str()); + from = from < 0 ? from + layers_counter : from; + setParams.setScaleChannels(from); + } else if (layer_type == "upsample") { int scaleFactor = getParam(layer_params, "stride", 1); @@ -761,7 +807,15 @@ namespace cv { std::string activation = getParam(layer_params, "activation", "linear"); if (activation == "leaky") { - setParams.setReLU(); + setParams.setActivation("relu"); + } + else if (activation == "swish") + { + setParams.setActivation("swish"); + } + else if (activation == "logistic") + { + setParams.setActivation("logistic"); } else if (activation != "linear") CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); @@ -818,13 +872,15 @@ namespace cv { { int kernel_size = getParam(layer_params, "size", -1); filters = getParam(layer_params, "filters", -1); + int groups = getParam(layer_params, "groups", 1); use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; CV_Assert(kernel_size > 0 && filters > 0); CV_Assert(tensor_shape[0] > 0); + CV_Assert(tensor_shape[0] % groups == 0); - weights_size = filters * tensor_shape[0] * kernel_size * kernel_size; - int sizes_weights[] = { filters, tensor_shape[0], kernel_size, kernel_size }; + weights_size = filters * (tensor_shape[0] / groups) * kernel_size * kernel_size; + int sizes_weights[] = { filters, tensor_shape[0] / groups, kernel_size, kernel_size }; weightsBlob.create(4, sizes_weights, CV_32F); } else @@ -879,8 +935,8 @@ namespace cv { } std::string activation = getParam(layer_params, "activation", "linear"); - if(activation == "leaky") - ++cv_layers_counter; // For ReLU + if(activation == "leaky" || activation == "swish" || activation == "logistic") + ++cv_layers_counter; // For ReLU, Swish, Sigmoid if(!darknet_layers_counter) tensor_shape.resize(1); diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 068f85eb48..58faaa1388 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -97,7 +97,7 @@ TEST(Test_Darknet, read_yolo_voc_stream) class Test_Darknet_layers : public DNNTestLayer { public: - void testDarknetLayer(const std::string& name, bool hasWeights = false) + void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true) { SCOPED_TRACE(name); Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy")); @@ -117,7 +117,7 @@ public: Mat out = net.forward(); normAssert(out, ref, "", default_l1, default_lInf); - if (inp.size[0] == 1) // test handling of batch size + if (inp.size[0] == 1 && testBatchProcessing) // test handling of batch size { SCOPED_TRACE("batch size 2"); @@ -552,6 +552,12 @@ TEST_P(Test_Darknet_layers, convolutional) testDarknetLayer("convolutional", true); } +TEST_P(Test_Darknet_layers, scale_channels) +{ + // TODO: test fails for batches due to a bug/missing feature in ScaleLayer + testDarknetLayer("scale_channels", false, false); +} + TEST_P(Test_Darknet_layers, connected) { if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)