diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 713624f728..5e1b125a0c 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -67,6 +67,7 @@ //M*/ #include "../precomp.hpp" +#include #include #include @@ -109,6 +110,26 @@ namespace cv { params.blobs = blobs; } + void setBatchNorm() + { + cv::dnn::LayerParams bn_param; + + bn_param.name = "BatchNorm-name"; + bn_param.type = "BatchNorm"; + bn_param.set("has_weight", true); + bn_param.set("has_bias", true); + bn_param.set("eps", 1E-6); // .000001f in Darknet Yolo + + darknet::LayerParameter lp; + std::string layer_name = cv::format("bn_%d", layer_id); + lp.layer_name = layer_name; + lp.layer_type = bn_param.type; + lp.layerParams = bn_param; + lp.bottom_indexes.push_back(last_layer); + last_layer = layer_name; + net->layers.push_back(lp); + } + cv::dnn::LayerParams getParamConvolution(int kernel, int pad, int stride, int filters_num) { @@ -149,25 +170,47 @@ namespace cv { net->layers.push_back(lp); if (use_batch_normalize) - { - cv::dnn::LayerParams bn_param; - - bn_param.name = "BatchNorm-name"; - bn_param.type = "BatchNorm"; - bn_param.set("has_weight", true); - bn_param.set("has_bias", true); - bn_param.set("eps", 1E-6); // .000001f in Darknet Yolo - - darknet::LayerParameter lp; - std::string layer_name = cv::format("bn_%d", layer_id); - lp.layer_name = layer_name; - lp.layer_type = bn_param.type; - lp.layerParams = bn_param; - lp.bottom_indexes.push_back(last_layer); - last_layer = layer_name; - net->layers.push_back(lp); + setBatchNorm(); + + layer_id++; + fused_layer_names.push_back(last_layer); + } + + cv::dnn::LayerParams getParamFullyConnected(int output) + { + cv::dnn::LayerParams params; + params.name = "FullyConnected-name"; + params.type = "InnerProduct"; + + params.set("bias_term", false); // true only if(BatchNorm == false) + params.set("num_output", output); + + return params; + } + + void setFullyConnected(int output, int use_batch_normalize) + { + cv::dnn::LayerParams fullyconnected_param = + getParamFullyConnected(output); + + darknet::LayerParameter lp; + std::string layer_name = cv::format("fullyConnected_%d", layer_id); + + // use BIAS in any case + if (!use_batch_normalize) { + fullyconnected_param.set("bias_term", true); } + lp.layer_name = layer_name; + lp.layer_type = fullyconnected_param.type; + lp.layerParams = fullyconnected_param; + lp.bottom_indexes.push_back(last_layer); + last_layer = layer_name; + net->layers.push_back(lp); + + if (use_batch_normalize) + setBatchNorm(); + layer_id++; fused_layer_names.push_back(last_layer); } @@ -191,18 +234,21 @@ namespace cv { fused_layer_names.back() = last_layer; } - void setMaxpool(size_t kernel, size_t pad, size_t stride) + void setMaxpool(int kernel, int pad, int stride) { cv::dnn::LayerParams maxpool_param; maxpool_param.set("pool", "max"); maxpool_param.set("kernel_size", kernel); - maxpool_param.set("pad", pad); + maxpool_param.set("pad_l", floor((float)pad / 2)); + maxpool_param.set("pad_r", ceil((float)pad / 2)); + maxpool_param.set("pad_t", floor((float)pad / 2)); + maxpool_param.set("pad_b", ceil((float)pad / 2)); + maxpool_param.set("ceil_mode", false); maxpool_param.set("stride", stride); - maxpool_param.set("pad_mode", "SAME"); maxpool_param.name = "Pooling-name"; maxpool_param.type = "Pooling"; - darknet::LayerParameter lp; + darknet::LayerParameter lp; std::string layer_name = cv::format("pool_%d", layer_id); lp.layer_name = layer_name; lp.layer_type = maxpool_param.type; @@ -539,7 +585,10 @@ namespace cv { net->channels = getParam(net_params, "channels", 3); CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0); - int current_channels = net->channels; + MatShape tensor_shape(3); + tensor_shape[0] = net->channels; + tensor_shape[1] = net->width; + tensor_shape[2] = net->height; net->out_channels_vec.resize(net->layers_cfg.size()); layers_counter = -1; @@ -568,23 +617,46 @@ namespace cv { padding = kernel_size / 2; CV_Assert(kernel_size > 0 && filters > 0); - CV_Assert(current_channels > 0); + CV_Assert(tensor_shape[0] > 0); - setParams.setConvolution(kernel_size, padding, stride, filters, current_channels, + setParams.setConvolution(kernel_size, padding, stride, filters, tensor_shape[0], batch_normalize); - current_channels = filters; + tensor_shape[0] = filters; + tensor_shape[1] = (tensor_shape[1] - kernel_size + 2 * padding) / stride + 1; + tensor_shape[2] = (tensor_shape[2] - kernel_size + 2 * padding) / stride + 1; + } + else if (layer_type == "connected") + { + int output = getParam(layer_params, "output", 1); + bool batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; + + CV_Assert(output > 0); + + setParams.setFullyConnected(output, batch_normalize); + + if(layers_counter && tensor_shape[1] > 1) + net->out_channels_vec[layers_counter-1] = total(tensor_shape); + + tensor_shape[0] = output; + tensor_shape[1] = 1; + tensor_shape[2] = 1; } else if (layer_type == "maxpool") { int kernel_size = getParam(layer_params, "size", 2); int stride = getParam(layer_params, "stride", 2); - int pad = getParam(layer_params, "pad", 0); - setParams.setMaxpool(kernel_size, pad, stride); + int padding = getParam(layer_params, "padding", kernel_size - 1); + setParams.setMaxpool(kernel_size, padding, stride); + + tensor_shape[1] = (tensor_shape[1] - kernel_size + padding) / stride + 1; + tensor_shape[2] = (tensor_shape[2] - kernel_size + padding) / stride + 1; } else if (layer_type == "avgpool") { setParams.setAvgpool(); + tensor_shape[1] = 1; + tensor_shape[2] = 1; } else if (layer_type == "softmax") { @@ -599,10 +671,10 @@ namespace cv { CV_Assert(!bottom_layers.empty()); std::vector layers_vec = getNumbers(bottom_layers); - current_channels = 0; + tensor_shape[0] = 0; for (size_t k = 0; k < layers_vec.size(); ++k) { layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter); - current_channels += net->out_channels_vec[layers_vec[k]]; + tensor_shape[0] += net->out_channels_vec[layers_vec[k]]; } if (layers_vec.size() == 1) @@ -610,10 +682,16 @@ namespace cv { else setParams.setConcat(layers_vec.size(), layers_vec.data()); } + else if (layer_type == "dropout") + { + setParams.setIdentity(layers_counter-1); + } else if (layer_type == "reorg") { int stride = getParam(layer_params, "stride", 2); - current_channels = current_channels * (stride*stride); + tensor_shape[0] = tensor_shape[0] * (stride * stride); + tensor_shape[1] = tensor_shape[1] / stride; + tensor_shape[2] = tensor_shape[2] / stride; setParams.setReorg(stride); } @@ -653,6 +731,8 @@ namespace cv { { int scaleFactor = getParam(layer_params, "stride", 1); setParams.setUpsample(scaleFactor); + tensor_shape[1] = tensor_shape[1] * scaleFactor; + tensor_shape[2] = tensor_shape[2] * scaleFactor; } else if (layer_type == "yolo") { @@ -686,7 +766,7 @@ namespace cv { else if (activation != "linear") CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); - net->out_channels_vec[layers_counter] = current_channels; + net->out_channels_vec[layers_counter] = tensor_shape[0]; } return true; @@ -712,7 +792,10 @@ namespace cv { if(transpose) CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented"); - int current_channels = net->channels; + MatShape tensor_shape(3); + tensor_shape[0] = net->channels; + tensor_shape[1] = net->width; + tensor_shape[2] = net->height; int cv_layers_counter = -1; int darknet_layers_counter = -1; @@ -725,19 +808,36 @@ namespace cv { std::map &layer_params = i->second; std::string layer_type = layer_params["type"]; - if (layer_type == "convolutional") + if (layer_type == "convolutional" || layer_type == "connected") { - int kernel_size = getParam(layer_params, "size", -1); - int filters = getParam(layer_params, "filters", -1); - bool use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; + size_t weights_size; + int filters; + bool use_batch_normalize; + cv::Mat weightsBlob; + if(layer_type == "convolutional") + { + int kernel_size = getParam(layer_params, "size", -1); + filters = getParam(layer_params, "filters", -1); + use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; + + CV_Assert(kernel_size > 0 && filters > 0); + CV_Assert(tensor_shape[0] > 0); + + weights_size = filters * tensor_shape[0] * kernel_size * kernel_size; + int sizes_weights[] = { filters, tensor_shape[0], kernel_size, kernel_size }; + weightsBlob.create(4, sizes_weights, CV_32F); + } + else + { + filters = getParam(layer_params, "output", 1); + use_batch_normalize = getParam(layer_params, "batch_normalize", 0) == 1; - CV_Assert(kernel_size > 0 && filters > 0); - CV_Assert(current_channels > 0); + CV_Assert(filters>0); - size_t const weights_size = filters * current_channels * kernel_size * kernel_size; - int sizes_weights[] = { filters, current_channels, kernel_size, kernel_size }; - cv::Mat weightsBlob; - weightsBlob.create(4, sizes_weights, CV_32F); + weights_size = total(tensor_shape) * filters; + int sizes_weights[] = { filters, total(tensor_shape) }; + weightsBlob.create(2, sizes_weights, CV_32F); + } CV_Assert(weightsBlob.isContinuous()); cv::Mat meanData_mat(1, filters, CV_32F); // mean @@ -753,14 +853,14 @@ namespace cv { } ifile.read(reinterpret_cast(weightsBlob.ptr()), sizeof(float)*weights_size); - // set convolutional weights - std::vector conv_blobs; - conv_blobs.push_back(weightsBlob); + // set conv/connected weights + std::vector layer_blobs; + layer_blobs.push_back(weightsBlob); if (!use_batch_normalize) { // use BIAS in any case - conv_blobs.push_back(biasData_mat); + layer_blobs.push_back(biasData_mat); } - setParams.setLayerBlobs(cv_layers_counter, conv_blobs); + setParams.setLayerBlobs(cv_layers_counter, layer_blobs); // set batch normalize (mean, variance, scale, bias) if (use_batch_normalize) { @@ -782,7 +882,10 @@ namespace cv { if(activation == "leaky") ++cv_layers_counter; // For ReLU - current_channels = net->out_channels_vec[darknet_layers_counter]; + if(!darknet_layers_counter) + tensor_shape.resize(1); + + tensor_shape[0] = net->out_channels_vec[darknet_layers_counter]; } return true; } diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 9cbed0f979..fb6cee8757 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -528,6 +528,11 @@ TEST_P(Test_Darknet_layers, reorg) testDarknetLayer("reorg"); } +TEST_P(Test_Darknet_layers, maxpool) +{ + testDarknetLayer("maxpool"); +} + TEST_P(Test_Darknet_layers, convolutional) { if (target == DNN_TARGET_MYRIAD) @@ -537,6 +542,13 @@ TEST_P(Test_Darknet_layers, convolutional) testDarknetLayer("convolutional", true); } +TEST_P(Test_Darknet_layers, connected) +{ + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + testDarknetLayer("connected", true); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_layers, dnnBackendsAndTargets()); }} // namespace