Merge pull request #9705 from AlexeyAB:dnn_darknet_yolo_v2

7 years ago · b7ff9ddcdd
parent 0739f28e56 ecc34dc521
commit b7ff9ddcdd
11 changed files with 1764 additions and 1 deletions
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -527,6 +527,18 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        static Ptr<PriorBoxLayer> create(const LayerParams& params);
    };
    class CV_EXPORTS ReorgLayer : public Layer
    {
    public:
        static Ptr<ReorgLayer> create(const LayerParams& params);
    };
    class CV_EXPORTS RegionLayer : public Layer
    {
    public:
        static Ptr<RegionLayer> create(const LayerParams& params);
    };
    class CV_EXPORTS DetectionOutputLayer : public Layer
    {
    public:
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -612,6 +612,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        virtual ~Importer();
    };
    /** @brief Reads a network model stored in <a href="https://pjreddie.com/darknet/">Darknet</a> model files.
    *  @param cfgFile      path to the .cfg file with text description of the network architecture.
    *  @param darknetModel path to the .weights file with learned network.
    *  @returns Network object that ready to do forward, throw an exception in failure cases.
    * @details This is shortcut consisting from DarknetImporter and Net::populateNet calls.
    */
    CV_EXPORTS_W Net readNetFromDarknet(const String &cfgFile, const String &darknetModel = String());
    /**
     *  @deprecated Use @ref readNetFromCaffe instead.
     *  @brief Creates the importer of <a href="http://caffe.berkeleyvision.org">Caffe</a> framework network.
--- a/modules/dnn/src/darknet/darknet_importer.cpp
+++ b/modules/dnn/src/darknet/darknet_importer.cpp
@ -0,0 +1,195 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //                        (3-clause BSD License)
 //
 // Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 // * Redistributions of source code must retain the above copyright notice,
 // this list of conditions and the following disclaimer.
 //
 // * Redistributions in binary form must reproduce the above copyright notice,
 // this list of conditions and the following disclaimer in the documentation
 // and/or other materials provided with the distribution.
 //
 // * Neither the names of the copyright holders nor the names of the contributors
 // may be used to endorse or promote products derived from this software
 // without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall copyright holders or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "../precomp.hpp"
 #include <iostream>
 #include <algorithm>
 #include <vector>
 #include <map>
 #include "darknet_io.hpp"
 namespace cv {
 namespace dnn {
 CV__DNN_EXPERIMENTAL_NS_BEGIN
 namespace
 {
 class DarknetImporter : public Importer
 {
    darknet::NetParameter net;
 public:
    DarknetImporter() {}
    DarknetImporter(const char *cfgFile, const char *darknetModel)
    {
        CV_TRACE_FUNCTION();
        ReadNetParamsFromCfgFileOrDie(cfgFile, &net);
        if (darknetModel && darknetModel[0])
            ReadNetParamsFromBinaryFileOrDie(darknetModel, &net);
    }
    struct BlobNote
    {
        BlobNote(const std::string &_name, int _layerId, int _outNum) :
            name(_name), layerId(_layerId), outNum(_outNum) {}
        std::string name;
        int layerId, outNum;
    };
    std::vector<BlobNote> addedBlobs;
    std::map<String, int> layerCounter;
    void populateNet(Net dstNet)
    {
        CV_TRACE_FUNCTION();
        int layersSize = net.layer_size();
        layerCounter.clear();
        addedBlobs.clear();
        addedBlobs.reserve(layersSize + 1);
        //setup input layer names
        {
            std::vector<String> netInputs(net.input_size());
            for (int inNum = 0; inNum < net.input_size(); inNum++)
            {
                addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
                netInputs[inNum] = net.input(inNum);
            }
            dstNet.setInputsNames(netInputs);
        }
        for (int li = 0; li < layersSize; li++)
        {
            const darknet::LayerParameter &layer = net.layer(li);
            String name = layer.name();
            String type = layer.type();
            LayerParams layerParams = layer.getLayerParams();
            int repetitions = layerCounter[name]++;
            if (repetitions)
                name += cv::format("_%d", repetitions);
            int id = dstNet.addLayer(name, type, layerParams);
            // iterate many bottoms layers (for example for: route -1, -4)
            for (int inNum = 0; inNum < layer.bottom_size(); inNum++)
                addInput(layer.bottom(inNum), id, inNum, dstNet, layer.name());
            for (int outNum = 0; outNum < layer.top_size(); outNum++)
                addOutput(layer, id, outNum);
        }
        addedBlobs.clear();
    }
    void addOutput(const darknet::LayerParameter &layer, int layerId, int outNum)
    {
        const std::string &name = layer.top(outNum);
        bool haveDups = false;
        for (int idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
        {
            if (addedBlobs[idx].name == name)
            {
                haveDups = true;
                break;
            }
        }
        if (haveDups)
        {
            bool isInplace = layer.bottom_size() > outNum && layer.bottom(outNum) == name;
            if (!isInplace)
                CV_Error(Error::StsBadArg, "Duplicate blobs produced by multiple sources");
        }
        addedBlobs.push_back(BlobNote(name, layerId, outNum));
    }
    void addInput(const std::string &name, int layerId, int inNum, Net &dstNet, std::string nn)
    {
        int idx;
        for (idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
        {
            if (addedBlobs[idx].name == name)
                break;
        }
        if (idx < 0)
        {
            CV_Error(Error::StsObjectNotFound, "Can't find output blob \"" + name + "\"");
            return;
        }
        dstNet.connect(addedBlobs[idx].layerId, addedBlobs[idx].outNum, layerId, inNum);
    }
    ~DarknetImporter()
    {
    }
 };
 }
 Net readNetFromDarknet(const String &cfgFile, const String &darknetModel /*= String()*/)
 {
    DarknetImporter darknetImporter(cfgFile.c_str(), darknetModel.c_str());
    Net net;
    darknetImporter.populateNet(net);
    return net;
 }
 CV__DNN_EXPERIMENTAL_NS_END
 }} // namespace
--- a/modules/dnn/src/darknet/darknet_io.cpp
+++ b/modules/dnn/src/darknet/darknet_io.cpp
@ -0,0 +1,624 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //                        (3-clause BSD License)
 //
 // Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 // * Redistributions of source code must retain the above copyright notice,
 // this list of conditions and the following disclaimer.
 //
 // * Redistributions in binary form must reproduce the above copyright notice,
 // this list of conditions and the following disclaimer in the documentation
 // and/or other materials provided with the distribution.
 //
 // * Neither the names of the copyright holders nor the names of the contributors
 // may be used to endorse or promote products derived from this software
 // without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall copyright holders or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //MIT License
 //
 //Copyright (c) 2017 Joseph Redmon
 //
 //Permission is hereby granted, free of charge, to any person obtaining a copy
 //of this software and associated documentation files (the "Software"), to deal
 //in the Software without restriction, including without limitation the rights
 //to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 //copies of the Software, and to permit persons to whom the Software is
 //furnished to do so, subject to the following conditions:
 //
 //The above copyright notice and this permission notice shall be included in all
 //copies or substantial portions of the Software.
 //
 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 //SOFTWARE.
 //
 //M*/
 #include <opencv2/core.hpp>
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include "darknet_io.hpp"
 namespace cv {
    namespace dnn {
        namespace darknet {
            template<typename T>
            T getParam(const std::map<std::string, std::string> &params, const std::string param_name, T init_val)
            {
                std::map<std::string, std::string>::const_iterator it = params.find(param_name);
                if (it != params.end()) {
                    std::stringstream ss(it->second);
                    ss >> init_val;
                }
                return init_val;
            }
            class setLayersParams {
                NetParameter *net;
                int layer_id;
                std::string last_layer;
                std::vector<std::string> fused_layer_names;
            public:
                setLayersParams(NetParameter *_net, std::string _first_layer = "data") :
                    net(_net), layer_id(0), last_layer(_first_layer)
                {}
                void setLayerBlobs(int i, std::vector<cv::Mat> blobs)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams &params = net->layers[i].layerParams;
                    params.blobs = blobs;
                }
                cv::dnn::experimental_dnn_v1::LayerParams getParamConvolution(int kernel, int pad,
                    int stride, int filters_num)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams params;
                    params.name = "Convolution-name";
                    params.type = "Convolution";
                    params.set<int>("kernel_size", kernel);
                    params.set<int>("pad", pad);
                    params.set<int>("stride", stride);
                    params.set<bool>("bias_term", false);	// true only if(BatchNorm == false)
                    params.set<int>("num_output", filters_num);
                    return params;
                }
                void setConvolution(int kernel, int pad, int stride,
                    int filters_num, int channels_num, int use_batch_normalize, int use_relu)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams conv_param =
                        getParamConvolution(kernel, pad, stride, filters_num);
                    darknet::LayerParameter lp;
                    std::string layer_name = cv::format("conv_%d", layer_id);
                    // use BIAS in any case
                    if (!use_batch_normalize) {
                        conv_param.set<bool>("bias_term", true);
                    }
                    lp.layer_name = layer_name;
                    lp.layer_type = conv_param.type;
                    lp.layerParams = conv_param;
                    lp.bottom_indexes.push_back(last_layer);
                    last_layer = layer_name;
                    net->layers.push_back(lp);
                    if (use_batch_normalize)
                    {
                        cv::dnn::experimental_dnn_v1::LayerParams bn_param;
                        bn_param.name = "BatchNorm-name";
                        bn_param.type = "BatchNorm";
                        bn_param.set<bool>("has_weight", true);
                        bn_param.set<bool>("has_bias", true);
                        bn_param.set<float>("eps", 1E-6);	// .000001f in Darknet Yolo
                        darknet::LayerParameter lp;
                        std::string layer_name = cv::format("bn_%d", layer_id);
                        lp.layer_name = layer_name;
                        lp.layer_type = bn_param.type;
                        lp.layerParams = bn_param;
                        lp.bottom_indexes.push_back(last_layer);
                        last_layer = layer_name;
                        net->layers.push_back(lp);
                    }
                    if (use_relu)
                    {
                        cv::dnn::experimental_dnn_v1::LayerParams activation_param;
                        activation_param.set<float>("negative_slope", 0.1f);
                        activation_param.name = "ReLU-name";
                        activation_param.type = "ReLU";
                        darknet::LayerParameter lp;
                        std::string layer_name = cv::format("relu_%d", layer_id);
                        lp.layer_name = layer_name;
                        lp.layer_type = activation_param.type;
                        lp.layerParams = activation_param;
                        lp.bottom_indexes.push_back(last_layer);
                        last_layer = layer_name;
                        net->layers.push_back(lp);
                    }
                    layer_id++;
                    fused_layer_names.push_back(last_layer);
                }
                void setMaxpool(size_t kernel, size_t pad, size_t stride)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams maxpool_param;
                    maxpool_param.set<cv::String>("pool", "max");
                    maxpool_param.set<int>("kernel_size", kernel);
                    maxpool_param.set<int>("pad", pad);
                    maxpool_param.set<int>("stride", stride);
                    maxpool_param.set<cv::String>("pad_mode", "SAME");
                    maxpool_param.name = "Pooling-name";
                    maxpool_param.type = "Pooling";
                    darknet::LayerParameter lp;
                    std::string layer_name = cv::format("pool_%d", layer_id);
                    lp.layer_name = layer_name;
                    lp.layer_type = maxpool_param.type;
                    lp.layerParams = maxpool_param;
                    lp.bottom_indexes.push_back(last_layer);
                    last_layer = layer_name;
                    net->layers.push_back(lp);
                    layer_id++;
                    fused_layer_names.push_back(last_layer);
                }
                void setConcat(int number_of_inputs, int *input_indexes)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams concat_param;
                    concat_param.name = "Concat-name";
                    concat_param.type = "Concat";
                    concat_param.set<int>("axis", 1);	// channels are in axis = 1
                    darknet::LayerParameter lp;
                    std::string layer_name = cv::format("concat_%d", layer_id);
                    lp.layer_name = layer_name;
                    lp.layer_type = concat_param.type;
                    lp.layerParams = concat_param;
                    for (int i = 0; i < number_of_inputs; ++i)
                        lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i]));
                    last_layer = layer_name;
                    net->layers.push_back(lp);
                    layer_id++;
                    fused_layer_names.push_back(last_layer);
                }
                void setIdentity(int bottom_index)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams identity_param;
                    identity_param.name = "Identity-name";
                    identity_param.type = "Identity";
                    darknet::LayerParameter lp;
                    std::string layer_name = cv::format("identity_%d", layer_id);
                    lp.layer_name = layer_name;
                    lp.layer_type = identity_param.type;
                    lp.layerParams = identity_param;
                    lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index));
                    last_layer = layer_name;
                    net->layers.push_back(lp);
                    layer_id++;
                    fused_layer_names.push_back(last_layer);
                }
                void setReorg(int stride)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams reorg_params;
                    reorg_params.name = "Reorg-name";
                    reorg_params.type = "Reorg";
                    reorg_params.set<int>("reorg_stride", stride);
                    darknet::LayerParameter lp;
                    std::string layer_name = cv::format("reorg_%d", layer_id);
                    lp.layer_name = layer_name;
                    lp.layer_type = reorg_params.type;
                    lp.layerParams = reorg_params;
                    lp.bottom_indexes.push_back(last_layer);
                    last_layer = layer_name;
                    net->layers.push_back(lp);
                    layer_id++;
                    fused_layer_names.push_back(last_layer);
                }
                void setPermute()
                {
                    cv::dnn::experimental_dnn_v1::LayerParams permute_params;
                    permute_params.name = "Permute-name";
                    permute_params.type = "Permute";
                    int permute[] = { 0, 2, 3, 1 };
                    cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4);
                    permute_params.set("order", paramOrder);
                    darknet::LayerParameter lp;
                    std::string layer_name = cv::format("premute_%d", layer_id);
                    lp.layer_name = layer_name;
                    lp.layer_type = permute_params.type;
                    lp.layerParams = permute_params;
                    lp.bottom_indexes.push_back(last_layer);
                    last_layer = layer_name;
                    net->layers.push_back(lp);
                    layer_id++;
                    fused_layer_names.push_back(last_layer);
                }
                void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData)
                {
                    cv::dnn::experimental_dnn_v1::LayerParams region_param;
                    region_param.name = "Region-name";
                    region_param.type = "Region";
                    region_param.set<float>("thresh", thresh);
                    region_param.set<int>("coords", coords);
                    region_param.set<int>("classes", classes);
                    region_param.set<int>("anchors", anchors);
                    region_param.set<int>("classfix", classfix);
                    region_param.set<bool>("softmax_tree", softmax_tree);
                    region_param.set<bool>("softmax", softmax);
                    cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone();
                    region_param.blobs.push_back(biasData_mat);
                    darknet::LayerParameter lp;
                    std::string layer_name = "detection_out";
                    lp.layer_name = layer_name;
                    lp.layer_type = region_param.type;
                    lp.layerParams = region_param;
                    lp.bottom_indexes.push_back(last_layer);
                    last_layer = layer_name;
                    net->layers.push_back(lp);
                    layer_id++;
                    fused_layer_names.push_back(last_layer);
                }
            };
            std::string escapeString(const std::string &src)
            {
                std::string dst;
                for (size_t i = 0; i < src.size(); ++i)
                    if (src[i] > ' ' && src[i] <= 'z')
                        dst += src[i];
                return dst;
            }
            template<typename T>
            std::vector<T> getNumbers(const std::string &src)
            {
                std::vector<T> dst;
                std::stringstream ss(src);
                for (std::string str; std::getline(ss, str, ',');) {
                    std::stringstream line(str);
                    T val;
                    line >> val;
                    dst.push_back(val);
                }
                return dst;
            }
            bool ReadDarknetFromCfgFile(const char *cfgFile, NetParameter *net)
            {
                std::ifstream ifile;
                ifile.open(cfgFile);
                if (ifile.is_open())
                {
                    bool read_net = false;
                    int layers_counter = -1;
                    for (std::string line; std::getline(ifile, line);) {
                        line = escapeString(line);
                        if (line.empty()) continue;
                        switch (line[0]) {
                        case '\0': break;
                        case '#': break;
                        case ';': break;
                        case '[':
                            if (line == "[net]") {
                                read_net = true;
                            }
                            else {
                                // read section
                                read_net = false;
                                ++layers_counter;
                                const size_t layer_type_size = line.find("]") - 1;
                                CV_Assert(layer_type_size < line.size());
                                std::string layer_type = line.substr(1, layer_type_size);
                                net->layers_cfg[layers_counter]["type"] = layer_type;
                            }
                            break;
                        default:
                            // read entry
                            const size_t separator_index = line.find('=');
                            CV_Assert(separator_index < line.size());
                            if (separator_index != std::string::npos) {
                                std::string name = line.substr(0, separator_index);
                                std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1));
                                name = escapeString(name);
                                value = escapeString(value);
                                if (name.empty() || value.empty()) continue;
                                if (read_net)
                                    net->net_cfg[name] = value;
                                else
                                    net->layers_cfg[layers_counter][name] = value;
                            }
                        }
                    }
                    std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"];
                    std::vector<float> vec = getNumbers<float>(anchors);
                    std::map<std::string, std::string> &net_params = net->net_cfg;
                    net->width = getParam(net_params, "width", 416);
                    net->height = getParam(net_params, "height", 416);
                    net->channels = getParam(net_params, "channels", 3);
                    CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0);
                }
                else
                    return false;
                int current_channels = net->channels;
                net->out_channels_vec.resize(net->layers_cfg.size());
                int layers_counter = -1;
                setLayersParams setParams(net);
                typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
                for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
                    ++layers_counter;
                    std::map<std::string, std::string> &layer_params = i->second;
                    std::string layer_type = layer_params["type"];
                    if (layer_type == "convolutional")
                    {
                        int kernel_size = getParam<int>(layer_params, "size", -1);
                        int pad = getParam<int>(layer_params, "pad", 0);
                        int stride = getParam<int>(layer_params, "stride", 1);
                        int filters = getParam<int>(layer_params, "filters", -1);
                        std::string activation = getParam<std::string>(layer_params, "activation", "linear");
                        bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
                        if(activation != "linear" && activation != "leaky")
                            CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
                        int flipped = getParam<int>(layer_params, "flipped", 0);
                        if (flipped == 1)
                            CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented");
                        // correct the strange value of pad=1 for kernel_size=1 in the Darknet cfg-file
                        if (kernel_size < 3) pad = 0;
                        CV_Assert(kernel_size > 0 && filters > 0);
                        CV_Assert(current_channels > 0);
                        setParams.setConvolution(kernel_size, pad, stride, filters, current_channels,
                            batch_normalize, activation == "leaky");
                        current_channels = filters;
                    }
                    else if (layer_type == "maxpool")
                    {
                        int kernel_size = getParam<int>(layer_params, "size", 2);
                        int stride = getParam<int>(layer_params, "stride", 2);
                        int pad = getParam<int>(layer_params, "pad", 0);
                        setParams.setMaxpool(kernel_size, pad, stride);
                    }
                    else if (layer_type == "route")
                    {
                        std::string bottom_layers = getParam<std::string>(layer_params, "layers", "");
                        CV_Assert(!bottom_layers.empty());
                        std::vector<int> layers_vec = getNumbers<int>(bottom_layers);
                        current_channels = 0;
                        for (size_t k = 0; k < layers_vec.size(); ++k) {
                            layers_vec[k] += layers_counter;
                            current_channels += net->out_channels_vec[layers_vec[k]];
                        }
                        if (layers_vec.size() == 1)
                            setParams.setIdentity(layers_vec.at(0));
                        else
                            setParams.setConcat(layers_vec.size(), layers_vec.data());
                    }
                    else if (layer_type == "reorg")
                    {
                        int stride = getParam<int>(layer_params, "stride", 2);
                        current_channels = current_channels * (stride*stride);
                        setParams.setReorg(stride);
                    }
                    else if (layer_type == "region")
                    {
                        float thresh = 0.001;    // in the original Darknet is equal to the detection threshold set by the user
                        int coords = getParam<int>(layer_params, "coords", 4);
                        int classes = getParam<int>(layer_params, "classes", -1);
                        int num_of_anchors = getParam<int>(layer_params, "num", -1);
                        int classfix = getParam<int>(layer_params, "classfix", 0);
                        bool softmax = (getParam<int>(layer_params, "softmax", 0) == 1);
                        bool softmax_tree = (getParam<std::string>(layer_params, "tree", "").size() > 0);
                        std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string());
                        CV_Assert(!anchors_values.empty());
                        std::vector<float> anchors_vec = getNumbers<float>(anchors_values);
                        CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size());
                        setParams.setPermute();
                        setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data());
                    }
                    else {
                        CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
                    }
                    net->out_channels_vec[layers_counter] = current_channels;
                }
                return true;
            }
            bool ReadDarknetFromWeightsFile(const char *darknetModel, NetParameter *net)
            {
                std::ifstream ifile;
                ifile.open(darknetModel, std::ios::binary);
                CV_Assert(ifile.is_open());
                int32_t major_ver, minor_ver, revision;
                ifile.read(reinterpret_cast<char *>(&major_ver), sizeof(int32_t));
                ifile.read(reinterpret_cast<char *>(&minor_ver), sizeof(int32_t));
                ifile.read(reinterpret_cast<char *>(&revision), sizeof(int32_t));
                uint64_t seen;
                if ((major_ver * 10 + minor_ver) >= 2) {
                    ifile.read(reinterpret_cast<char *>(&seen), sizeof(uint64_t));
                }
                else {
                    int32_t iseen = 0;
                    ifile.read(reinterpret_cast<char *>(&iseen), sizeof(int32_t));
                    seen = iseen;
                }
                bool transpose = (major_ver > 1000) || (minor_ver > 1000);
                if(transpose)
                    CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented");
                int current_channels = net->channels;
                int cv_layers_counter = -1;
                int darknet_layers_counter = -1;
                setLayersParams setParams(net);
                typedef std::map<int, std::map<std::string, std::string> >::iterator it_type;
                for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) {
                    ++darknet_layers_counter;
                    ++cv_layers_counter;
                    std::map<std::string, std::string> &layer_params = i->second;
                    std::string layer_type = layer_params["type"];
                    if (layer_type == "convolutional")
                    {
                        int kernel_size = getParam<int>(layer_params, "size", -1);
                        int filters = getParam<int>(layer_params, "filters", -1);
                        std::string activation = getParam<std::string>(layer_params, "activation", "linear");
                        bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
                        CV_Assert(kernel_size > 0 && filters > 0);
                        CV_Assert(current_channels > 0);
                        size_t const weights_size = filters * current_channels * kernel_size * kernel_size;
                        int sizes_weights[] = { filters, current_channels, kernel_size, kernel_size };
                        cv::Mat weightsBlob;
                        weightsBlob.create(4, sizes_weights, CV_32F);
                        CV_Assert(weightsBlob.isContinuous());
                        cv::Mat meanData_mat(1, filters, CV_32F);	// mean
                        cv::Mat stdData_mat(1, filters, CV_32F);	// variance
                        cv::Mat weightsData_mat(1, filters, CV_32F);// scale
                        cv::Mat biasData_mat(1, filters, CV_32F);	// bias
                        ifile.read(reinterpret_cast<char *>(biasData_mat.ptr<float>()), sizeof(float)*filters);
                        if (use_batch_normalize) {
                            ifile.read(reinterpret_cast<char *>(weightsData_mat.ptr<float>()), sizeof(float)*filters);
                            ifile.read(reinterpret_cast<char *>(meanData_mat.ptr<float>()), sizeof(float)*filters);
                            ifile.read(reinterpret_cast<char *>(stdData_mat.ptr<float>()), sizeof(float)*filters);
                        }
                        ifile.read(reinterpret_cast<char *>(weightsBlob.ptr<float>()), sizeof(float)*weights_size);
                        // set convolutional weights
                        std::vector<cv::Mat> conv_blobs;
                        conv_blobs.push_back(weightsBlob);
                        if (!use_batch_normalize) {
                            // use BIAS in any case
                            conv_blobs.push_back(biasData_mat);
                        }
                        setParams.setLayerBlobs(cv_layers_counter, conv_blobs);
                        // set batch normalize (mean, variance, scale, bias)
                        if (use_batch_normalize) {
                            ++cv_layers_counter;
                            std::vector<cv::Mat> bn_blobs;
                            bn_blobs.push_back(meanData_mat);
                            bn_blobs.push_back(stdData_mat);
                            bn_blobs.push_back(weightsData_mat);
                            bn_blobs.push_back(biasData_mat);
                            setParams.setLayerBlobs(cv_layers_counter, bn_blobs);
                        }
                        if(activation == "leaky")
                            ++cv_layers_counter;
                    }
                    current_channels = net->out_channels_vec[darknet_layers_counter];
                }
                return true;
            }
        }
        void ReadNetParamsFromCfgFileOrDie(const char *cfgFile, darknet::NetParameter *net)
        {
            if (!darknet::ReadDarknetFromCfgFile(cfgFile, net)) {
                CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter file: " + std::string(cfgFile));
            }
        }
        void ReadNetParamsFromBinaryFileOrDie(const char *darknetModel, darknet::NetParameter *net)
        {
            if (!darknet::ReadDarknetFromWeightsFile(darknetModel, net)) {
                CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter file: " + std::string(darknetModel));
            }
        }
    }
 }
--- a/modules/dnn/src/darknet/darknet_io.hpp
+++ b/modules/dnn/src/darknet/darknet_io.hpp
@ -0,0 +1,116 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //                        (3-clause BSD License)
 //
 // Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 // * Redistributions of source code must retain the above copyright notice,
 // this list of conditions and the following disclaimer.
 //
 // * Redistributions in binary form must reproduce the above copyright notice,
 // this list of conditions and the following disclaimer in the documentation
 // and/or other materials provided with the distribution.
 //
 // * Neither the names of the copyright holders nor the names of the contributors
 // may be used to endorse or promote products derived from this software
 // without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall copyright holders or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //MIT License
 //
 //Copyright (c) 2017 Joseph Redmon
 //
 //Permission is hereby granted, free of charge, to any person obtaining a copy
 //of this software and associated documentation files (the "Software"), to deal
 //in the Software without restriction, including without limitation the rights
 //to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 //copies of the Software, and to permit persons to whom the Software is
 //furnished to do so, subject to the following conditions:
 //
 //The above copyright notice and this permission notice shall be included in all
 //copies or substantial portions of the Software.
 //
 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 //FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 //AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 //LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 //OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 //SOFTWARE.
 //
 //M*/
 #ifndef __OPENCV_DNN_DARKNET_IO_HPP__
 #define __OPENCV_DNN_DARKNET_IO_HPP__
 #include <opencv2/dnn/dnn.hpp>
 namespace cv {
    namespace dnn {
        namespace darknet {
            class LayerParameter {
                std::string layer_name, layer_type;
                std::vector<std::string> bottom_indexes;
                cv::dnn::experimental_dnn_v1::LayerParams layerParams;
            public:
                friend class setLayersParams;
                cv::dnn::experimental_dnn_v1::LayerParams getLayerParams() const { return layerParams; }
                std::string name() const { return layer_name; }
                std::string type() const { return layer_type; }
                int bottom_size() const { return bottom_indexes.size(); }
                std::string bottom(const int index) const { return bottom_indexes.at(index); }
                int top_size() const { return 1; }
                std::string top(const int index) const { return layer_name; }
            };
            class NetParameter {
            public:
                int width, height, channels;
                std::vector<LayerParameter> layers;
                std::vector<int> out_channels_vec;
                std::map<int, std::map<std::string, std::string> > layers_cfg;
                std::map<std::string, std::string> net_cfg;
                int layer_size() const { return layers.size(); }
                int input_size() const { return 1; }
                std::string input(const int index) const { return "data"; }
                LayerParameter layer(const int index) const { return layers.at(index); }
            };
        }
        // Read parameters from a file into a NetParameter message.
        void ReadNetParamsFromCfgFileOrDie(const char *cfgFile, darknet::NetParameter *net);
        void ReadNetParamsFromBinaryFileOrDie(const char *darknetModel, darknet::NetParameter *net);
    }
 }
 #endif
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@ -114,6 +114,8 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(Eltwise,        EltwiseLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Permute,        PermuteLayer);
    CV_DNN_REGISTER_LAYER_CLASS(PriorBox,       PriorBoxLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Reorg,          ReorgLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Region,         RegionLayer);
    CV_DNN_REGISTER_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
    CV_DNN_REGISTER_LAYER_CLASS(NormalizeBBox,  NormalizeBBoxLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Normalize,      NormalizeBBoxLayer);
--- a/modules/dnn/src/layers/region_layer.cpp
+++ b/modules/dnn/src/layers/region_layer.cpp
@ -0,0 +1,331 @@
 /*M ///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "../precomp.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/dnn/all_layers.hpp>
 #include <iostream>
 namespace cv
 {
 namespace dnn
 {
 class RegionLayerImpl : public RegionLayer
 {
 public:
    int coords, classes, anchors, classfix;
    float thresh, nmsThreshold;
    bool useSoftmaxTree, useSoftmax;
    RegionLayerImpl(const LayerParams& params)
    {
        setParamsFrom(params);
        CV_Assert(blobs.size() == 1);
        thresh = params.get<float>("thresh", 0.2);
        coords = params.get<int>("coords", 4);
        classes = params.get<int>("classes", 0);
        anchors = params.get<int>("anchors", 5);
        classfix = params.get<int>("classfix", 0);
        useSoftmaxTree = params.get<bool>("softmax_tree", false);
        useSoftmax = params.get<bool>("softmax", false);
        nmsThreshold = params.get<float>("nms_threshold", 0.4);
        CV_Assert(nmsThreshold >= 0.);
        CV_Assert(coords == 4);
        CV_Assert(classes >= 1);
        CV_Assert(anchors >= 1);
        CV_Assert(useSoftmaxTree || useSoftmax);
    }
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
                         const int requiredOutputs,
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const
    {
        CV_Assert(inputs.size() > 0);
        CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
        outputs = std::vector<MatShape>(inputs.size(), shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
        return false;
    }
    virtual bool supportBackend(int backendId)
    {
        return backendId == DNN_BACKEND_DEFAULT;
    }
    float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }
    void softmax_activate(const float* input, const int n, const float temp, float* output)
    {
        int i;
        float sum = 0;
        float largest = -FLT_MAX;
        for (i = 0; i < n; ++i) {
            if (input[i] > largest) largest = input[i];
        }
        for (i = 0; i < n; ++i) {
            float e = exp((input[i] - largest) / temp);
            sum += e;
            output[i] = e;
        }
        for (i = 0; i < n; ++i) {
            output[i] /= sum;
        }
    }
    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
    {
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
        CV_Assert(inputs.size() >= 1);
        int const cell_size = classes + coords + 1;
        const float* biasData = blobs[0].ptr<float>();
        for (size_t ii = 0; ii < outputs.size(); ii++)
        {
            Mat &inpBlob = *inputs[ii];
            Mat &outBlob = outputs[ii];
            int rows = inpBlob.size[1];
            int cols = inpBlob.size[2];
            const float *srcData = inpBlob.ptr<float>();
            float *dstData = outBlob.ptr<float>();
            // logistic activation for t0, for each grid cell (X x Y x Anchor-index)
            for (int i = 0; i < rows*cols*anchors; ++i) {
                int index = cell_size*i;
                float x = srcData[index + 4];
                dstData[index + 4] = logistic_activate(x);	// logistic activation
            }
            if (useSoftmaxTree) {   // Yolo 9000
                CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
            }
            else if (useSoftmax) {  // Yolo v2
                // softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
                for (int i = 0; i < rows*cols*anchors; ++i) {
                    int index = cell_size*i;
                    softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
                }
                for (int x = 0; x < cols; ++x)
                    for(int y = 0; y < rows; ++y)
                        for (int a = 0; a < anchors; ++a) {
                            int index = (y*cols + x)*anchors + a;	// index for each grid-cell & anchor
                            int p_index = index * cell_size + 4;
                            float scale = dstData[p_index];
                            if (classfix == -1 && scale < .5) scale = 0;	// if(t0 < 0.5) t0 = 0;
                            int box_index = index * cell_size;
                            dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols;
                            dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows;
                            dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / cols;
                            dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / rows;
                            int class_index = index * cell_size + 5;
                            if (useSoftmaxTree) {
                                CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
                            }
                            else {
                                for (int j = 0; j < classes; ++j) {
                                    float prob = scale*dstData[class_index + j];	// prob = IoU(box, object) = t0 * class-probability
                                    dstData[class_index + j] = (prob > thresh) ? prob : 0;		// if (IoU < threshold) IoU = 0;
                                }
                            }
                        }
            }
            if (nmsThreshold > 0) {
                do_nms_sort(dstData, rows*cols*anchors, nmsThreshold);
                //do_nms(dstData, rows*cols*anchors, nmsThreshold);
            }
        }
    }
    struct box {
        float x, y, w, h;
        float *probs;
    };
    float overlap(float x1, float w1, float x2, float w2)
    {
        float l1 = x1 - w1 / 2;
        float l2 = x2 - w2 / 2;
        float left = l1 > l2 ? l1 : l2;
        float r1 = x1 + w1 / 2;
        float r2 = x2 + w2 / 2;
        float right = r1 < r2 ? r1 : r2;
        return right - left;
    }
    float box_intersection(box a, box b)
    {
        float w = overlap(a.x, a.w, b.x, b.w);
        float h = overlap(a.y, a.h, b.y, b.h);
        if (w < 0 || h < 0) return 0;
        float area = w*h;
        return area;
    }
    float box_union(box a, box b)
    {
        float i = box_intersection(a, b);
        float u = a.w*a.h + b.w*b.h - i;
        return u;
    }
    float box_iou(box a, box b)
    {
        return box_intersection(a, b) / box_union(a, b);
    }
    struct sortable_bbox {
        int index;
        float *probs;
    };
    struct nms_comparator {
        int k;
        nms_comparator(int _k) : k(_k) {}
        bool operator ()(sortable_bbox v1, sortable_bbox v2) {
            return v2.probs[k] < v1.probs[k];
        }
    };
    void do_nms_sort(float *detections, int total, float nms_thresh)
    {
        std::vector<box> boxes(total);
        for (int i = 0; i < total; ++i) {
            box &b = boxes[i];
            int box_index = i * (classes + coords + 1);
            b.x = detections[box_index + 0];
            b.y = detections[box_index + 1];
            b.w = detections[box_index + 2];
            b.h = detections[box_index + 3];
            int class_index = i * (classes + 5) + 5;
            b.probs = (detections + class_index);
        }
        std::vector<sortable_bbox> s(total);
        for (int i = 0; i < total; ++i) {
            s[i].index = i;
            int class_index = i * (classes + 5) + 5;
            s[i].probs = (detections + class_index);
        }
        for (int k = 0; k < classes; ++k) {
            std::stable_sort(s.begin(), s.end(), nms_comparator(k));
            for (int i = 0; i < total; ++i) {
                if (boxes[s[i].index].probs[k] == 0) continue;
                box a = boxes[s[i].index];
                for (int j = i + 1; j < total; ++j) {
                    box b = boxes[s[j].index];
                    if (box_iou(a, b) > nms_thresh) {
                        boxes[s[j].index].probs[k] = 0;
                    }
                }
            }
        }
    }
    void do_nms(float *detections, int total, float nms_thresh)
    {
        std::vector<box> boxes(total);
        for (int i = 0; i < total; ++i) {
            box &b = boxes[i];
            int box_index = i * (classes + coords + 1);
            b.x = detections[box_index + 0];
            b.y = detections[box_index + 1];
            b.w = detections[box_index + 2];
            b.h = detections[box_index + 3];
            int class_index = i * (classes + 5) + 5;
            b.probs = (detections + class_index);
        }
        for (int i = 0; i < total; ++i) {
            bool any = false;
            for (int k = 0; k < classes; ++k) any = any || (boxes[i].probs[k] > 0);
            if (!any) {
                continue;
            }
            for (int j = i + 1; j < total; ++j) {
                if (box_iou(boxes[i], boxes[j]) > nms_thresh) {
                    for (int k = 0; k < classes; ++k) {
                        if (boxes[i].probs[k] < boxes[j].probs[k]) boxes[i].probs[k] = 0;
                        else boxes[j].probs[k] = 0;
                    }
                }
            }
        }
    }
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                           const std::vector<MatShape> &outputs) const
    {
        (void)outputs; // suppress unused variable warning
        int64 flops = 0;
        for(int i = 0; i < inputs.size(); i++)
        {
            flops += 60*total(inputs[i]);
        }
        return flops;
    }
 };
 Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
 {
    return Ptr<RegionLayer>(new RegionLayerImpl(params));
 }
 }  // namespace dnn
 }  // namespace cv
--- a/modules/dnn/src/layers/reorg_layer.cpp
+++ b/modules/dnn/src/layers/reorg_layer.cpp
@ -0,0 +1,140 @@
 /*M ///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "../precomp.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/dnn/all_layers.hpp>
 #include <iostream>
 namespace cv
 {
 namespace dnn
 {
 class ReorgLayerImpl : public ReorgLayer
 {
    int reorgStride;
 public:
    ReorgLayerImpl(const LayerParams& params)
    {
        setParamsFrom(params);
        reorgStride = params.get<int>("reorg_stride", 2);
        CV_Assert(reorgStride > 0);
    }
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
                         const int requiredOutputs,
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const
    {
        CV_Assert(inputs.size() > 0);
        outputs = std::vector<MatShape>(inputs.size(), shape(
            inputs[0][0],
            inputs[0][1] * reorgStride * reorgStride,
            inputs[0][2] / reorgStride,
            inputs[0][3] / reorgStride));
        CV_Assert(outputs[0][0] > 0 && outputs[0][1] > 0 && outputs[0][2] > 0 && outputs[0][3] > 0);
        CV_Assert(total(outputs[0]) == total(inputs[0]));
        return false;
    }
    virtual bool supportBackend(int backendId)
    {
        return backendId == DNN_BACKEND_DEFAULT;
    }
    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
    {
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
        for (size_t i = 0; i < inputs.size(); i++)
        {
            Mat srcBlob = *inputs[i];
            MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]);
            float *dstData = outputs[0].ptr<float>();
            const float *srcData = srcBlob.ptr<float>();
            int channels = inputShape[1], height = inputShape[2], width = inputShape[3];
            int out_c = channels / (reorgStride*reorgStride);
            for (int k = 0; k < channels; ++k) {
                for (int j = 0; j < height; ++j) {
                    for (int i = 0; i < width; ++i) {
                        int out_index = i + width*(j + height*k);
                        int c2 = k % out_c;
                        int offset = k / out_c;
                        int w2 = i*reorgStride + offset % reorgStride;
                        int h2 = j*reorgStride + offset / reorgStride;
                        int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2);
                        dstData[out_index] = srcData[in_index];
                    }
                }
            }
        }
    }
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                           const std::vector<MatShape> &outputs) const
    {
        (void)outputs; // suppress unused variable warning
        int64 flops = 0;
        for(int i = 0; i < inputs.size(); i++)
        {
            flops += 21*total(inputs[i]);
        }
        return flops;
    }
 };
 Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params)
 {
    return Ptr<ReorgLayer>(new ReorgLayerImpl(params));
 }
 }  // namespace dnn
 }  // namespace cv
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@ -0,0 +1,186 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //                        (3-clause BSD License)
 //
 // Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 // * Redistributions of source code must retain the above copyright notice,
 // this list of conditions and the following disclaimer.
 //
 // * Redistributions in binary form must reproduce the above copyright notice,
 // this list of conditions and the following disclaimer in the documentation
 // and/or other materials provided with the distribution.
 //
 // * Neither the names of the copyright holders nor the names of the contributors
 // may be used to endorse or promote products derived from this software
 // without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall copyright holders or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "test_precomp.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <algorithm>
 namespace cvtest
 {
 using namespace cv;
 using namespace cv::dnn;
 template<typename TString>
 static std::string _tf(TString filename)
 {
    return (getOpenCVExtraDir() + "/dnn/") + filename;
 }
 TEST(Test_Darknet, read_tiny_yolo_voc)
 {
    Net net = readNetFromDarknet(_tf("tiny-yolo-voc.cfg"));
    ASSERT_FALSE(net.empty());
 }
 TEST(Test_Darknet, read_yolo_voc)
 {
    Net net = readNetFromDarknet(_tf("yolo-voc.cfg"));
    ASSERT_FALSE(net.empty());
 }
 TEST(Reproducibility_TinyYoloVoc, Accuracy)
 {
    Net net;
    {
        const string cfg = findDataFile("dnn/tiny-yolo-voc.cfg", false);
        const string model = findDataFile("dnn/tiny-yolo-voc.weights", false);
        net = readNetFromDarknet(cfg, model);
        ASSERT_FALSE(net.empty());
    }
    // dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
    Mat sample = imread(_tf("dog416.png"));
    ASSERT_TRUE(!sample.empty());
    Size inputSize(416, 416);
    if (sample.size() != inputSize)
        resize(sample, sample, inputSize);
    net.setInput(blobFromImage(sample, 1 / 255.F), "data");
    Mat out = net.forward("detection_out");
    Mat detection;
    const float confidenceThreshold = 0.24;
    for (int i = 0; i < out.rows; i++) {
        const int probability_index = 5;
        const int probability_size = out.cols - probability_index;
        float *prob_array_ptr = &out.at<float>(i, probability_index);
        size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
        float confidence = out.at<float>(i, (int)objectClass + probability_index);
        if (confidence > confidenceThreshold)
            detection.push_back(out.row(i));
    }
    // obtained by: ./darknet detector test ./cfg/voc.data  ./cfg/tiny-yolo-voc.cfg ./tiny-yolo-voc.weights -thresh 0.24 ./dog416.png
    // There are 2 objects (6-car, 11-dog) with 25 values for each:
    // { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
    float ref_array[] = {
        0.736762F, 0.239551F, 0.315440F, 0.160779F, 0.761977F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.761967F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.287486F, 0.653731F, 0.315579F, 0.534527F, 0.782737F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.780595F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
    };
    const int number_of_objects = 2;
    Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array);
    normAssert(ref, detection);
 }
 TEST(Reproducibility_YoloVoc, Accuracy)
 {
    Net net;
    {
        const string cfg = findDataFile("dnn/yolo-voc.cfg", false);
        const string model = findDataFile("dnn/yolo-voc.weights", false);
        net = readNetFromDarknet(cfg, model);
        ASSERT_FALSE(net.empty());
    }
    // dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
    Mat sample = imread(_tf("dog416.png"));
    ASSERT_TRUE(!sample.empty());
    Size inputSize(416, 416);
    if (sample.size() != inputSize)
        resize(sample, sample, inputSize);
    net.setInput(blobFromImage(sample, 1 / 255.F), "data");
    Mat out = net.forward("detection_out");
    Mat detection;
    const float confidenceThreshold = 0.24;
    for (int i = 0; i < out.rows; i++) {
        const int probability_index = 5;
        const int probability_size = out.cols - probability_index;
        float *prob_array_ptr = &out.at<float>(i, probability_index);
        size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
        float confidence = out.at<float>(i, (int)objectClass + probability_index);
        if (confidence > confidenceThreshold)
            detection.push_back(out.row(i));
    }
    // obtained by: ./darknet detector test ./cfg/voc.data  ./cfg/yolo-voc.cfg ./yolo-voc.weights -thresh 0.24 ./dog416.png
    // There are 3 objects (6-car, 1-bicycle, 11-dog) with 25 values for each:
    // { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
    float ref_array[] = {
        0.740161F, 0.214100F, 0.325575F, 0.173418F, 0.750769F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.750469F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.501618F, 0.504757F, 0.461713F, 0.481310F, 0.783550F, 0.000000F, 0.780879F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.279968F, 0.638651F, 0.282737F, 0.600284F, 0.901864F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.901615F,
        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
    };
    const int number_of_objects = 3;
    Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array);
    normAssert(ref, detection);
 }
 }
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -10,7 +10,7 @@
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@ -485,4 +485,36 @@ TEST_F(Layer_RNN_Test, get_set_test)
    EXPECT_EQ(shape(outputs[1]), shape(nT, nS, nH));
 }
 void testLayerUsingDarknetModels(String basename, bool useDarknetModel = false, bool useCommonInputBlob = true)
 {
    String cfg = _tf(basename + ".cfg");
    String weights = _tf(basename + ".weights");
    String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
    String outfile = _tf(basename + ".npy");
    cv::setNumThreads(cv::getNumberOfCPUs());
    Net net = readNetFromDarknet(cfg, (useDarknetModel) ? weights : String());
    ASSERT_FALSE(net.empty());
    Mat inp = blobFromNPY(inpfile);
    Mat ref = blobFromNPY(outfile);
    net.setInput(inp, "data");
    Mat out = net.forward();
    normAssert(ref, out);
 }
 TEST(Layer_Test_Region, Accuracy)
 {
    testLayerUsingDarknetModels("region", false, false);
 }
 TEST(Layer_Test_Reorg, Accuracy)
 {
    testLayerUsingDarknetModels("reorg", false, false);
 }
 }
--- a/samples/dnn/yolo_object_detection.cpp
+++ b/samples/dnn/yolo_object_detection.cpp
@ -0,0 +1,117 @@
 #include <opencv2/dnn.hpp>
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
 using namespace cv;
 using namespace cv::dnn;
 #include <fstream>
 #include <iostream>
 #include <algorithm>
 #include <cstdlib>
 using namespace std;
 const size_t network_width = 416;
 const size_t network_height = 416;
 const char* about = "This sample uses You only look once (YOLO)-Detector "
                    "(https://arxiv.org/abs/1612.08242)"
                    "to detect objects on image\n"; // TODO: link
 const char* params
    = "{ help           | false | print usage         }"
      "{ cfg            |       | model configuration }"
      "{ model          |       | model weights       }"
      "{ image          |       | image for detection }"
      "{ min_confidence | 0.24  | min confidence      }";
 int main(int argc, char** argv)
 {
    cv::CommandLineParser parser(argc, argv, params);
    if (parser.get<bool>("help"))
    {
        std::cout << about << std::endl;
        parser.printMessage();
        return 0;
    }
    String modelConfiguration = parser.get<string>("cfg");
    String modelBinary = parser.get<string>("model");
    //! [Initialize network]
    dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary);
    //! [Initialize network]
    if (net.empty())
    {
        cerr << "Can't load network by using the following files: " << endl;
        cerr << "cfg-file:     " << modelConfiguration << endl;
        cerr << "weights-file: " << modelBinary << endl;
        cerr << "Models can be downloaded here:" << endl;
        cerr << "https://pjreddie.com/darknet/yolo/" << endl;
        exit(-1);
    }
    cv::Mat frame = cv::imread(parser.get<string>("image"));
    //! [Resizing without keeping aspect ratio]
    cv::Mat resized;
    cv::resize(frame, resized, cv::Size(network_width, network_height));
    //! [Resizing without keeping aspect ratio]
    //! [Prepare blob]
    Mat inputBlob = blobFromImage(resized, 1 / 255.F); //Convert Mat to batch of images
    //! [Prepare blob]
    //! [Set input blob]
    net.setInput(inputBlob, "data");                //set the network input
    //! [Set input blob]
    //! [Make forward pass]
    cv::Mat detectionMat = net.forward("detection_out");	//compute output
   //! [Make forward pass]
    float confidenceThreshold = parser.get<float>("min_confidence");
    for (int i = 0; i < detectionMat.rows; i++)
    {
        const int probability_index = 5;
        const int probability_size = detectionMat.cols - probability_index;
        float *prob_array_ptr = &detectionMat.at<float>(i, probability_index);
        size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
        float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index);
        if (confidence > confidenceThreshold)
        {
            float x = detectionMat.at<float>(i, 0);
            float y = detectionMat.at<float>(i, 1);
            float width = detectionMat.at<float>(i, 2);
            float height = detectionMat.at<float>(i, 3);
            float xLeftBottom = (x - width / 2) * frame.cols;
            float yLeftBottom = (y - height / 2) * frame.rows;
            float xRightTop = (x + width / 2) * frame.cols;
            float yRightTop = (y + height / 2) * frame.rows;
            std::cout << "Class: " << objectClass << std::endl;
            std::cout << "Confidence: " << confidence << std::endl;
            std::cout << " " << xLeftBottom
                << " " << yLeftBottom
                << " " << xRightTop
                << " " << yRightTop << std::endl;
            Rect object((int)xLeftBottom, (int)yLeftBottom,
                (int)(xRightTop - xLeftBottom),
                (int)(yRightTop - yLeftBottom));
            rectangle(frame, object, Scalar(0, 255, 0));
        }
    }
    imshow("detections", frame);
    waitKey();
    return 0;
 } // main