mirror of https://github.com/opencv/opencv.git
Merge pull request #9705 from AlexeyAB:dnn_darknet_yolo_v2
commit
b7ff9ddcdd
11 changed files with 1764 additions and 1 deletions
@ -0,0 +1,195 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
// (3-clause BSD License)
|
||||||
|
//
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * Neither the names of the copyright holders nor the names of the contributors
|
||||||
|
// may be used to endorse or promote products derived from this software
|
||||||
|
// without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall copyright holders or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "../precomp.hpp" |
||||||
|
|
||||||
|
#include <iostream> |
||||||
|
#include <algorithm> |
||||||
|
#include <vector> |
||||||
|
#include <map> |
||||||
|
|
||||||
|
#include "darknet_io.hpp" |
||||||
|
|
||||||
|
|
||||||
|
namespace cv { |
||||||
|
namespace dnn { |
||||||
|
CV__DNN_EXPERIMENTAL_NS_BEGIN |
||||||
|
|
||||||
|
namespace |
||||||
|
{ |
||||||
|
|
||||||
|
class DarknetImporter : public Importer |
||||||
|
{ |
||||||
|
darknet::NetParameter net; |
||||||
|
|
||||||
|
public: |
||||||
|
|
||||||
|
DarknetImporter() {} |
||||||
|
|
||||||
|
DarknetImporter(const char *cfgFile, const char *darknetModel) |
||||||
|
{ |
||||||
|
CV_TRACE_FUNCTION(); |
||||||
|
|
||||||
|
ReadNetParamsFromCfgFileOrDie(cfgFile, &net); |
||||||
|
|
||||||
|
if (darknetModel && darknetModel[0]) |
||||||
|
ReadNetParamsFromBinaryFileOrDie(darknetModel, &net); |
||||||
|
} |
||||||
|
|
||||||
|
struct BlobNote |
||||||
|
{ |
||||||
|
BlobNote(const std::string &_name, int _layerId, int _outNum) : |
||||||
|
name(_name), layerId(_layerId), outNum(_outNum) {} |
||||||
|
|
||||||
|
std::string name; |
||||||
|
int layerId, outNum; |
||||||
|
}; |
||||||
|
|
||||||
|
std::vector<BlobNote> addedBlobs; |
||||||
|
std::map<String, int> layerCounter; |
||||||
|
|
||||||
|
void populateNet(Net dstNet) |
||||||
|
{ |
||||||
|
CV_TRACE_FUNCTION(); |
||||||
|
|
||||||
|
int layersSize = net.layer_size(); |
||||||
|
layerCounter.clear(); |
||||||
|
addedBlobs.clear(); |
||||||
|
addedBlobs.reserve(layersSize + 1); |
||||||
|
|
||||||
|
//setup input layer names
|
||||||
|
{ |
||||||
|
std::vector<String> netInputs(net.input_size()); |
||||||
|
for (int inNum = 0; inNum < net.input_size(); inNum++) |
||||||
|
{ |
||||||
|
addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum)); |
||||||
|
netInputs[inNum] = net.input(inNum); |
||||||
|
} |
||||||
|
dstNet.setInputsNames(netInputs); |
||||||
|
} |
||||||
|
|
||||||
|
for (int li = 0; li < layersSize; li++) |
||||||
|
{ |
||||||
|
const darknet::LayerParameter &layer = net.layer(li); |
||||||
|
String name = layer.name(); |
||||||
|
String type = layer.type(); |
||||||
|
LayerParams layerParams = layer.getLayerParams(); |
||||||
|
|
||||||
|
int repetitions = layerCounter[name]++; |
||||||
|
if (repetitions) |
||||||
|
name += cv::format("_%d", repetitions); |
||||||
|
|
||||||
|
int id = dstNet.addLayer(name, type, layerParams); |
||||||
|
|
||||||
|
// iterate many bottoms layers (for example for: route -1, -4)
|
||||||
|
for (int inNum = 0; inNum < layer.bottom_size(); inNum++) |
||||||
|
addInput(layer.bottom(inNum), id, inNum, dstNet, layer.name()); |
||||||
|
|
||||||
|
for (int outNum = 0; outNum < layer.top_size(); outNum++) |
||||||
|
addOutput(layer, id, outNum); |
||||||
|
} |
||||||
|
|
||||||
|
addedBlobs.clear(); |
||||||
|
} |
||||||
|
|
||||||
|
void addOutput(const darknet::LayerParameter &layer, int layerId, int outNum) |
||||||
|
{ |
||||||
|
const std::string &name = layer.top(outNum); |
||||||
|
|
||||||
|
bool haveDups = false; |
||||||
|
for (int idx = (int)addedBlobs.size() - 1; idx >= 0; idx--) |
||||||
|
{ |
||||||
|
if (addedBlobs[idx].name == name) |
||||||
|
{ |
||||||
|
haveDups = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (haveDups) |
||||||
|
{ |
||||||
|
bool isInplace = layer.bottom_size() > outNum && layer.bottom(outNum) == name; |
||||||
|
if (!isInplace) |
||||||
|
CV_Error(Error::StsBadArg, "Duplicate blobs produced by multiple sources"); |
||||||
|
} |
||||||
|
|
||||||
|
addedBlobs.push_back(BlobNote(name, layerId, outNum)); |
||||||
|
} |
||||||
|
|
||||||
|
void addInput(const std::string &name, int layerId, int inNum, Net &dstNet, std::string nn) |
||||||
|
{ |
||||||
|
int idx; |
||||||
|
for (idx = (int)addedBlobs.size() - 1; idx >= 0; idx--) |
||||||
|
{ |
||||||
|
if (addedBlobs[idx].name == name) |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (idx < 0) |
||||||
|
{ |
||||||
|
CV_Error(Error::StsObjectNotFound, "Can't find output blob \"" + name + "\""); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
dstNet.connect(addedBlobs[idx].layerId, addedBlobs[idx].outNum, layerId, inNum); |
||||||
|
} |
||||||
|
|
||||||
|
~DarknetImporter() |
||||||
|
{ |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
}; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
Net readNetFromDarknet(const String &cfgFile, const String &darknetModel /*= String()*/) |
||||||
|
{ |
||||||
|
DarknetImporter darknetImporter(cfgFile.c_str(), darknetModel.c_str()); |
||||||
|
Net net; |
||||||
|
darknetImporter.populateNet(net); |
||||||
|
return net; |
||||||
|
} |
||||||
|
|
||||||
|
CV__DNN_EXPERIMENTAL_NS_END |
||||||
|
}} // namespace
|
@ -0,0 +1,624 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
// (3-clause BSD License)
|
||||||
|
//
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * Neither the names of the copyright holders nor the names of the contributors
|
||||||
|
// may be used to endorse or promote products derived from this software
|
||||||
|
// without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall copyright holders or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//MIT License
|
||||||
|
//
|
||||||
|
//Copyright (c) 2017 Joseph Redmon
|
||||||
|
//
|
||||||
|
//Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
//of this software and associated documentation files (the "Software"), to deal
|
||||||
|
//in the Software without restriction, including without limitation the rights
|
||||||
|
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
//copies of the Software, and to permit persons to whom the Software is
|
||||||
|
//furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
//The above copyright notice and this permission notice shall be included in all
|
||||||
|
//copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
//SOFTWARE.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include <opencv2/core.hpp> |
||||||
|
|
||||||
|
#include <iostream> |
||||||
|
#include <fstream> |
||||||
|
#include <sstream> |
||||||
|
|
||||||
|
#include "darknet_io.hpp" |
||||||
|
|
||||||
|
namespace cv { |
||||||
|
namespace dnn { |
||||||
|
namespace darknet { |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
T getParam(const std::map<std::string, std::string> ¶ms, const std::string param_name, T init_val) |
||||||
|
{ |
||||||
|
std::map<std::string, std::string>::const_iterator it = params.find(param_name); |
||||||
|
if (it != params.end()) { |
||||||
|
std::stringstream ss(it->second); |
||||||
|
ss >> init_val; |
||||||
|
} |
||||||
|
return init_val; |
||||||
|
} |
||||||
|
|
||||||
|
class setLayersParams { |
||||||
|
|
||||||
|
NetParameter *net; |
||||||
|
int layer_id; |
||||||
|
std::string last_layer; |
||||||
|
std::vector<std::string> fused_layer_names; |
||||||
|
|
||||||
|
public: |
||||||
|
setLayersParams(NetParameter *_net, std::string _first_layer = "data") : |
||||||
|
net(_net), layer_id(0), last_layer(_first_layer) |
||||||
|
{} |
||||||
|
|
||||||
|
void setLayerBlobs(int i, std::vector<cv::Mat> blobs) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams ¶ms = net->layers[i].layerParams; |
||||||
|
params.blobs = blobs; |
||||||
|
} |
||||||
|
|
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams getParamConvolution(int kernel, int pad, |
||||||
|
int stride, int filters_num) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams params; |
||||||
|
params.name = "Convolution-name"; |
||||||
|
params.type = "Convolution"; |
||||||
|
|
||||||
|
params.set<int>("kernel_size", kernel); |
||||||
|
params.set<int>("pad", pad); |
||||||
|
params.set<int>("stride", stride); |
||||||
|
|
||||||
|
params.set<bool>("bias_term", false); // true only if(BatchNorm == false)
|
||||||
|
params.set<int>("num_output", filters_num); |
||||||
|
|
||||||
|
return params; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
void setConvolution(int kernel, int pad, int stride, |
||||||
|
int filters_num, int channels_num, int use_batch_normalize, int use_relu) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams conv_param = |
||||||
|
getParamConvolution(kernel, pad, stride, filters_num); |
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
std::string layer_name = cv::format("conv_%d", layer_id); |
||||||
|
|
||||||
|
// use BIAS in any case
|
||||||
|
if (!use_batch_normalize) { |
||||||
|
conv_param.set<bool>("bias_term", true); |
||||||
|
} |
||||||
|
|
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = conv_param.type; |
||||||
|
lp.layerParams = conv_param; |
||||||
|
lp.bottom_indexes.push_back(last_layer); |
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
|
||||||
|
if (use_batch_normalize) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams bn_param; |
||||||
|
|
||||||
|
bn_param.name = "BatchNorm-name"; |
||||||
|
bn_param.type = "BatchNorm"; |
||||||
|
bn_param.set<bool>("has_weight", true); |
||||||
|
bn_param.set<bool>("has_bias", true); |
||||||
|
bn_param.set<float>("eps", 1E-6); // .000001f in Darknet Yolo
|
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
std::string layer_name = cv::format("bn_%d", layer_id); |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = bn_param.type; |
||||||
|
lp.layerParams = bn_param; |
||||||
|
lp.bottom_indexes.push_back(last_layer); |
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
} |
||||||
|
|
||||||
|
if (use_relu) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams activation_param; |
||||||
|
activation_param.set<float>("negative_slope", 0.1f); |
||||||
|
activation_param.name = "ReLU-name"; |
||||||
|
activation_param.type = "ReLU"; |
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
std::string layer_name = cv::format("relu_%d", layer_id); |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = activation_param.type; |
||||||
|
lp.layerParams = activation_param; |
||||||
|
lp.bottom_indexes.push_back(last_layer); |
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
} |
||||||
|
|
||||||
|
layer_id++; |
||||||
|
fused_layer_names.push_back(last_layer); |
||||||
|
} |
||||||
|
|
||||||
|
void setMaxpool(size_t kernel, size_t pad, size_t stride) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams maxpool_param; |
||||||
|
maxpool_param.set<cv::String>("pool", "max"); |
||||||
|
maxpool_param.set<int>("kernel_size", kernel); |
||||||
|
maxpool_param.set<int>("pad", pad); |
||||||
|
maxpool_param.set<int>("stride", stride); |
||||||
|
maxpool_param.set<cv::String>("pad_mode", "SAME"); |
||||||
|
maxpool_param.name = "Pooling-name"; |
||||||
|
maxpool_param.type = "Pooling"; |
||||||
|
darknet::LayerParameter lp; |
||||||
|
|
||||||
|
std::string layer_name = cv::format("pool_%d", layer_id); |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = maxpool_param.type; |
||||||
|
lp.layerParams = maxpool_param; |
||||||
|
lp.bottom_indexes.push_back(last_layer); |
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
layer_id++; |
||||||
|
fused_layer_names.push_back(last_layer); |
||||||
|
} |
||||||
|
|
||||||
|
void setConcat(int number_of_inputs, int *input_indexes) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams concat_param; |
||||||
|
concat_param.name = "Concat-name"; |
||||||
|
concat_param.type = "Concat"; |
||||||
|
concat_param.set<int>("axis", 1); // channels are in axis = 1
|
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
|
||||||
|
std::string layer_name = cv::format("concat_%d", layer_id); |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = concat_param.type; |
||||||
|
lp.layerParams = concat_param; |
||||||
|
for (int i = 0; i < number_of_inputs; ++i) |
||||||
|
lp.bottom_indexes.push_back(fused_layer_names.at(input_indexes[i])); |
||||||
|
|
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
|
||||||
|
layer_id++; |
||||||
|
fused_layer_names.push_back(last_layer); |
||||||
|
} |
||||||
|
|
||||||
|
void setIdentity(int bottom_index) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams identity_param; |
||||||
|
identity_param.name = "Identity-name"; |
||||||
|
identity_param.type = "Identity"; |
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
|
||||||
|
std::string layer_name = cv::format("identity_%d", layer_id); |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = identity_param.type; |
||||||
|
lp.layerParams = identity_param; |
||||||
|
lp.bottom_indexes.push_back(fused_layer_names.at(bottom_index)); |
||||||
|
|
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
|
||||||
|
layer_id++; |
||||||
|
fused_layer_names.push_back(last_layer); |
||||||
|
} |
||||||
|
|
||||||
|
void setReorg(int stride) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams reorg_params; |
||||||
|
reorg_params.name = "Reorg-name"; |
||||||
|
reorg_params.type = "Reorg"; |
||||||
|
reorg_params.set<int>("reorg_stride", stride); |
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
std::string layer_name = cv::format("reorg_%d", layer_id); |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = reorg_params.type; |
||||||
|
lp.layerParams = reorg_params; |
||||||
|
lp.bottom_indexes.push_back(last_layer); |
||||||
|
last_layer = layer_name; |
||||||
|
|
||||||
|
net->layers.push_back(lp); |
||||||
|
|
||||||
|
layer_id++; |
||||||
|
fused_layer_names.push_back(last_layer); |
||||||
|
} |
||||||
|
|
||||||
|
void setPermute() |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams permute_params; |
||||||
|
permute_params.name = "Permute-name"; |
||||||
|
permute_params.type = "Permute"; |
||||||
|
int permute[] = { 0, 2, 3, 1 }; |
||||||
|
cv::dnn::DictValue paramOrder = cv::dnn::DictValue::arrayInt(permute, 4); |
||||||
|
|
||||||
|
permute_params.set("order", paramOrder); |
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
std::string layer_name = cv::format("premute_%d", layer_id); |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = permute_params.type; |
||||||
|
lp.layerParams = permute_params; |
||||||
|
lp.bottom_indexes.push_back(last_layer); |
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
|
||||||
|
layer_id++; |
||||||
|
fused_layer_names.push_back(last_layer); |
||||||
|
} |
||||||
|
|
||||||
|
void setRegion(float thresh, int coords, int classes, int anchors, int classfix, int softmax, int softmax_tree, float *biasData) |
||||||
|
{ |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams region_param; |
||||||
|
region_param.name = "Region-name"; |
||||||
|
region_param.type = "Region"; |
||||||
|
|
||||||
|
region_param.set<float>("thresh", thresh); |
||||||
|
region_param.set<int>("coords", coords); |
||||||
|
region_param.set<int>("classes", classes); |
||||||
|
region_param.set<int>("anchors", anchors); |
||||||
|
region_param.set<int>("classfix", classfix); |
||||||
|
region_param.set<bool>("softmax_tree", softmax_tree); |
||||||
|
region_param.set<bool>("softmax", softmax); |
||||||
|
|
||||||
|
cv::Mat biasData_mat = cv::Mat(1, anchors * 2, CV_32F, biasData).clone(); |
||||||
|
region_param.blobs.push_back(biasData_mat); |
||||||
|
|
||||||
|
darknet::LayerParameter lp; |
||||||
|
std::string layer_name = "detection_out"; |
||||||
|
lp.layer_name = layer_name; |
||||||
|
lp.layer_type = region_param.type; |
||||||
|
lp.layerParams = region_param; |
||||||
|
lp.bottom_indexes.push_back(last_layer); |
||||||
|
last_layer = layer_name; |
||||||
|
net->layers.push_back(lp); |
||||||
|
|
||||||
|
layer_id++; |
||||||
|
fused_layer_names.push_back(last_layer); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
std::string escapeString(const std::string &src) |
||||||
|
{ |
||||||
|
std::string dst; |
||||||
|
for (size_t i = 0; i < src.size(); ++i) |
||||||
|
if (src[i] > ' ' && src[i] <= 'z') |
||||||
|
dst += src[i]; |
||||||
|
return dst; |
||||||
|
} |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
std::vector<T> getNumbers(const std::string &src) |
||||||
|
{ |
||||||
|
std::vector<T> dst; |
||||||
|
std::stringstream ss(src); |
||||||
|
|
||||||
|
for (std::string str; std::getline(ss, str, ',');) { |
||||||
|
std::stringstream line(str); |
||||||
|
T val; |
||||||
|
line >> val; |
||||||
|
dst.push_back(val); |
||||||
|
} |
||||||
|
return dst; |
||||||
|
} |
||||||
|
|
||||||
|
bool ReadDarknetFromCfgFile(const char *cfgFile, NetParameter *net) |
||||||
|
{ |
||||||
|
std::ifstream ifile; |
||||||
|
ifile.open(cfgFile); |
||||||
|
if (ifile.is_open()) |
||||||
|
{ |
||||||
|
bool read_net = false; |
||||||
|
int layers_counter = -1; |
||||||
|
for (std::string line; std::getline(ifile, line);) { |
||||||
|
line = escapeString(line); |
||||||
|
if (line.empty()) continue; |
||||||
|
switch (line[0]) { |
||||||
|
case '\0': break; |
||||||
|
case '#': break; |
||||||
|
case ';': break; |
||||||
|
case '[': |
||||||
|
if (line == "[net]") { |
||||||
|
read_net = true; |
||||||
|
} |
||||||
|
else { |
||||||
|
// read section
|
||||||
|
read_net = false; |
||||||
|
++layers_counter; |
||||||
|
const size_t layer_type_size = line.find("]") - 1; |
||||||
|
CV_Assert(layer_type_size < line.size()); |
||||||
|
std::string layer_type = line.substr(1, layer_type_size); |
||||||
|
net->layers_cfg[layers_counter]["type"] = layer_type; |
||||||
|
} |
||||||
|
break; |
||||||
|
default: |
||||||
|
// read entry
|
||||||
|
const size_t separator_index = line.find('='); |
||||||
|
CV_Assert(separator_index < line.size()); |
||||||
|
if (separator_index != std::string::npos) { |
||||||
|
std::string name = line.substr(0, separator_index); |
||||||
|
std::string value = line.substr(separator_index + 1, line.size() - (separator_index + 1)); |
||||||
|
name = escapeString(name); |
||||||
|
value = escapeString(value); |
||||||
|
if (name.empty() || value.empty()) continue; |
||||||
|
if (read_net) |
||||||
|
net->net_cfg[name] = value; |
||||||
|
else |
||||||
|
net->layers_cfg[layers_counter][name] = value; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
std::string anchors = net->layers_cfg[net->layers_cfg.size() - 1]["anchors"]; |
||||||
|
std::vector<float> vec = getNumbers<float>(anchors); |
||||||
|
std::map<std::string, std::string> &net_params = net->net_cfg; |
||||||
|
net->width = getParam(net_params, "width", 416); |
||||||
|
net->height = getParam(net_params, "height", 416); |
||||||
|
net->channels = getParam(net_params, "channels", 3); |
||||||
|
CV_Assert(net->width > 0 && net->height > 0 && net->channels > 0); |
||||||
|
} |
||||||
|
else |
||||||
|
return false; |
||||||
|
|
||||||
|
int current_channels = net->channels; |
||||||
|
net->out_channels_vec.resize(net->layers_cfg.size()); |
||||||
|
|
||||||
|
int layers_counter = -1; |
||||||
|
|
||||||
|
setLayersParams setParams(net); |
||||||
|
|
||||||
|
typedef std::map<int, std::map<std::string, std::string> >::iterator it_type; |
||||||
|
for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) { |
||||||
|
++layers_counter; |
||||||
|
std::map<std::string, std::string> &layer_params = i->second; |
||||||
|
std::string layer_type = layer_params["type"]; |
||||||
|
|
||||||
|
if (layer_type == "convolutional") |
||||||
|
{ |
||||||
|
int kernel_size = getParam<int>(layer_params, "size", -1); |
||||||
|
int pad = getParam<int>(layer_params, "pad", 0); |
||||||
|
int stride = getParam<int>(layer_params, "stride", 1); |
||||||
|
int filters = getParam<int>(layer_params, "filters", -1); |
||||||
|
std::string activation = getParam<std::string>(layer_params, "activation", "linear"); |
||||||
|
bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1; |
||||||
|
if(activation != "linear" && activation != "leaky") |
||||||
|
CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation); |
||||||
|
int flipped = getParam<int>(layer_params, "flipped", 0); |
||||||
|
if (flipped == 1) |
||||||
|
CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented"); |
||||||
|
|
||||||
|
// correct the strange value of pad=1 for kernel_size=1 in the Darknet cfg-file
|
||||||
|
if (kernel_size < 3) pad = 0; |
||||||
|
|
||||||
|
CV_Assert(kernel_size > 0 && filters > 0); |
||||||
|
CV_Assert(current_channels > 0); |
||||||
|
|
||||||
|
setParams.setConvolution(kernel_size, pad, stride, filters, current_channels, |
||||||
|
batch_normalize, activation == "leaky"); |
||||||
|
|
||||||
|
current_channels = filters; |
||||||
|
} |
||||||
|
else if (layer_type == "maxpool") |
||||||
|
{ |
||||||
|
int kernel_size = getParam<int>(layer_params, "size", 2); |
||||||
|
int stride = getParam<int>(layer_params, "stride", 2); |
||||||
|
int pad = getParam<int>(layer_params, "pad", 0); |
||||||
|
setParams.setMaxpool(kernel_size, pad, stride); |
||||||
|
} |
||||||
|
else if (layer_type == "route") |
||||||
|
{ |
||||||
|
std::string bottom_layers = getParam<std::string>(layer_params, "layers", ""); |
||||||
|
CV_Assert(!bottom_layers.empty()); |
||||||
|
std::vector<int> layers_vec = getNumbers<int>(bottom_layers); |
||||||
|
|
||||||
|
current_channels = 0; |
||||||
|
for (size_t k = 0; k < layers_vec.size(); ++k) { |
||||||
|
layers_vec[k] += layers_counter; |
||||||
|
current_channels += net->out_channels_vec[layers_vec[k]]; |
||||||
|
} |
||||||
|
|
||||||
|
if (layers_vec.size() == 1) |
||||||
|
setParams.setIdentity(layers_vec.at(0)); |
||||||
|
else |
||||||
|
setParams.setConcat(layers_vec.size(), layers_vec.data()); |
||||||
|
} |
||||||
|
else if (layer_type == "reorg") |
||||||
|
{ |
||||||
|
int stride = getParam<int>(layer_params, "stride", 2); |
||||||
|
current_channels = current_channels * (stride*stride); |
||||||
|
|
||||||
|
setParams.setReorg(stride); |
||||||
|
} |
||||||
|
else if (layer_type == "region") |
||||||
|
{ |
||||||
|
float thresh = 0.001; // in the original Darknet is equal to the detection threshold set by the user
|
||||||
|
int coords = getParam<int>(layer_params, "coords", 4); |
||||||
|
int classes = getParam<int>(layer_params, "classes", -1); |
||||||
|
int num_of_anchors = getParam<int>(layer_params, "num", -1); |
||||||
|
int classfix = getParam<int>(layer_params, "classfix", 0); |
||||||
|
bool softmax = (getParam<int>(layer_params, "softmax", 0) == 1); |
||||||
|
bool softmax_tree = (getParam<std::string>(layer_params, "tree", "").size() > 0); |
||||||
|
|
||||||
|
std::string anchors_values = getParam<std::string>(layer_params, "anchors", std::string()); |
||||||
|
CV_Assert(!anchors_values.empty()); |
||||||
|
std::vector<float> anchors_vec = getNumbers<float>(anchors_values); |
||||||
|
|
||||||
|
CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size()); |
||||||
|
|
||||||
|
setParams.setPermute(); |
||||||
|
setParams.setRegion(thresh, coords, classes, num_of_anchors, classfix, softmax, softmax_tree, anchors_vec.data()); |
||||||
|
} |
||||||
|
else { |
||||||
|
CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); |
||||||
|
} |
||||||
|
net->out_channels_vec[layers_counter] = current_channels; |
||||||
|
} |
||||||
|
|
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
bool ReadDarknetFromWeightsFile(const char *darknetModel, NetParameter *net) |
||||||
|
{ |
||||||
|
std::ifstream ifile; |
||||||
|
ifile.open(darknetModel, std::ios::binary); |
||||||
|
CV_Assert(ifile.is_open()); |
||||||
|
|
||||||
|
int32_t major_ver, minor_ver, revision; |
||||||
|
ifile.read(reinterpret_cast<char *>(&major_ver), sizeof(int32_t)); |
||||||
|
ifile.read(reinterpret_cast<char *>(&minor_ver), sizeof(int32_t)); |
||||||
|
ifile.read(reinterpret_cast<char *>(&revision), sizeof(int32_t)); |
||||||
|
|
||||||
|
uint64_t seen; |
||||||
|
if ((major_ver * 10 + minor_ver) >= 2) { |
||||||
|
ifile.read(reinterpret_cast<char *>(&seen), sizeof(uint64_t)); |
||||||
|
} |
||||||
|
else { |
||||||
|
int32_t iseen = 0; |
||||||
|
ifile.read(reinterpret_cast<char *>(&iseen), sizeof(int32_t)); |
||||||
|
seen = iseen; |
||||||
|
} |
||||||
|
bool transpose = (major_ver > 1000) || (minor_ver > 1000); |
||||||
|
if(transpose) |
||||||
|
CV_Error(cv::Error::StsNotImplemented, "Transpose the weights (except for convolutional) is not implemented"); |
||||||
|
|
||||||
|
int current_channels = net->channels; |
||||||
|
int cv_layers_counter = -1; |
||||||
|
int darknet_layers_counter = -1; |
||||||
|
|
||||||
|
setLayersParams setParams(net); |
||||||
|
|
||||||
|
typedef std::map<int, std::map<std::string, std::string> >::iterator it_type; |
||||||
|
for (it_type i = net->layers_cfg.begin(); i != net->layers_cfg.end(); ++i) { |
||||||
|
++darknet_layers_counter; |
||||||
|
++cv_layers_counter; |
||||||
|
std::map<std::string, std::string> &layer_params = i->second; |
||||||
|
std::string layer_type = layer_params["type"]; |
||||||
|
|
||||||
|
if (layer_type == "convolutional") |
||||||
|
{ |
||||||
|
int kernel_size = getParam<int>(layer_params, "size", -1); |
||||||
|
int filters = getParam<int>(layer_params, "filters", -1); |
||||||
|
std::string activation = getParam<std::string>(layer_params, "activation", "linear"); |
||||||
|
bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1; |
||||||
|
|
||||||
|
CV_Assert(kernel_size > 0 && filters > 0); |
||||||
|
CV_Assert(current_channels > 0); |
||||||
|
|
||||||
|
size_t const weights_size = filters * current_channels * kernel_size * kernel_size; |
||||||
|
int sizes_weights[] = { filters, current_channels, kernel_size, kernel_size }; |
||||||
|
cv::Mat weightsBlob; |
||||||
|
weightsBlob.create(4, sizes_weights, CV_32F); |
||||||
|
CV_Assert(weightsBlob.isContinuous()); |
||||||
|
|
||||||
|
cv::Mat meanData_mat(1, filters, CV_32F); // mean
|
||||||
|
cv::Mat stdData_mat(1, filters, CV_32F); // variance
|
||||||
|
cv::Mat weightsData_mat(1, filters, CV_32F);// scale
|
||||||
|
cv::Mat biasData_mat(1, filters, CV_32F); // bias
|
||||||
|
|
||||||
|
ifile.read(reinterpret_cast<char *>(biasData_mat.ptr<float>()), sizeof(float)*filters); |
||||||
|
if (use_batch_normalize) { |
||||||
|
ifile.read(reinterpret_cast<char *>(weightsData_mat.ptr<float>()), sizeof(float)*filters); |
||||||
|
ifile.read(reinterpret_cast<char *>(meanData_mat.ptr<float>()), sizeof(float)*filters); |
||||||
|
ifile.read(reinterpret_cast<char *>(stdData_mat.ptr<float>()), sizeof(float)*filters); |
||||||
|
} |
||||||
|
ifile.read(reinterpret_cast<char *>(weightsBlob.ptr<float>()), sizeof(float)*weights_size); |
||||||
|
|
||||||
|
// set convolutional weights
|
||||||
|
std::vector<cv::Mat> conv_blobs; |
||||||
|
conv_blobs.push_back(weightsBlob); |
||||||
|
if (!use_batch_normalize) { |
||||||
|
// use BIAS in any case
|
||||||
|
conv_blobs.push_back(biasData_mat); |
||||||
|
} |
||||||
|
setParams.setLayerBlobs(cv_layers_counter, conv_blobs); |
||||||
|
|
||||||
|
// set batch normalize (mean, variance, scale, bias)
|
||||||
|
if (use_batch_normalize) { |
||||||
|
++cv_layers_counter; |
||||||
|
std::vector<cv::Mat> bn_blobs; |
||||||
|
bn_blobs.push_back(meanData_mat); |
||||||
|
bn_blobs.push_back(stdData_mat); |
||||||
|
bn_blobs.push_back(weightsData_mat); |
||||||
|
bn_blobs.push_back(biasData_mat); |
||||||
|
setParams.setLayerBlobs(cv_layers_counter, bn_blobs); |
||||||
|
} |
||||||
|
|
||||||
|
if(activation == "leaky") |
||||||
|
++cv_layers_counter; |
||||||
|
} |
||||||
|
current_channels = net->out_channels_vec[darknet_layers_counter]; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
void ReadNetParamsFromCfgFileOrDie(const char *cfgFile, darknet::NetParameter *net) |
||||||
|
{ |
||||||
|
if (!darknet::ReadDarknetFromCfgFile(cfgFile, net)) { |
||||||
|
CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter file: " + std::string(cfgFile)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void ReadNetParamsFromBinaryFileOrDie(const char *darknetModel, darknet::NetParameter *net) |
||||||
|
{ |
||||||
|
if (!darknet::ReadDarknetFromWeightsFile(darknetModel, net)) { |
||||||
|
CV_Error(cv::Error::StsParseError, "Failed to parse NetParameter file: " + std::string(darknetModel)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
} |
@ -0,0 +1,116 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
// (3-clause BSD License)
|
||||||
|
//
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * Neither the names of the copyright holders nor the names of the contributors
|
||||||
|
// may be used to endorse or promote products derived from this software
|
||||||
|
// without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall copyright holders or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//MIT License
|
||||||
|
//
|
||||||
|
//Copyright (c) 2017 Joseph Redmon
|
||||||
|
//
|
||||||
|
//Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
//of this software and associated documentation files (the "Software"), to deal
|
||||||
|
//in the Software without restriction, including without limitation the rights
|
||||||
|
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
//copies of the Software, and to permit persons to whom the Software is
|
||||||
|
//furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
//The above copyright notice and this permission notice shall be included in all
|
||||||
|
//copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
//SOFTWARE.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef __OPENCV_DNN_DARKNET_IO_HPP__ |
||||||
|
#define __OPENCV_DNN_DARKNET_IO_HPP__ |
||||||
|
|
||||||
|
#include <opencv2/dnn/dnn.hpp> |
||||||
|
|
||||||
|
namespace cv { |
||||||
|
namespace dnn { |
||||||
|
namespace darknet { |
||||||
|
|
||||||
|
class LayerParameter { |
||||||
|
std::string layer_name, layer_type; |
||||||
|
std::vector<std::string> bottom_indexes; |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams layerParams; |
||||||
|
public: |
||||||
|
friend class setLayersParams; |
||||||
|
cv::dnn::experimental_dnn_v1::LayerParams getLayerParams() const { return layerParams; } |
||||||
|
std::string name() const { return layer_name; } |
||||||
|
std::string type() const { return layer_type; } |
||||||
|
int bottom_size() const { return bottom_indexes.size(); } |
||||||
|
std::string bottom(const int index) const { return bottom_indexes.at(index); } |
||||||
|
int top_size() const { return 1; } |
||||||
|
std::string top(const int index) const { return layer_name; } |
||||||
|
}; |
||||||
|
|
||||||
|
class NetParameter { |
||||||
|
public: |
||||||
|
int width, height, channels; |
||||||
|
std::vector<LayerParameter> layers; |
||||||
|
std::vector<int> out_channels_vec; |
||||||
|
|
||||||
|
std::map<int, std::map<std::string, std::string> > layers_cfg; |
||||||
|
std::map<std::string, std::string> net_cfg; |
||||||
|
|
||||||
|
int layer_size() const { return layers.size(); } |
||||||
|
|
||||||
|
int input_size() const { return 1; } |
||||||
|
std::string input(const int index) const { return "data"; } |
||||||
|
LayerParameter layer(const int index) const { return layers.at(index); } |
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
// Read parameters from a file into a NetParameter message.
|
||||||
|
void ReadNetParamsFromCfgFileOrDie(const char *cfgFile, darknet::NetParameter *net); |
||||||
|
void ReadNetParamsFromBinaryFileOrDie(const char *darknetModel, darknet::NetParameter *net); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
#endif |
@ -0,0 +1,331 @@ |
|||||||
|
/*M ///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "../precomp.hpp" |
||||||
|
#include <opencv2/dnn/shape_utils.hpp> |
||||||
|
#include <opencv2/dnn/all_layers.hpp> |
||||||
|
#include <iostream> |
||||||
|
|
||||||
|
namespace cv |
||||||
|
{ |
||||||
|
namespace dnn |
||||||
|
{ |
||||||
|
|
||||||
|
class RegionLayerImpl : public RegionLayer |
||||||
|
{ |
||||||
|
public: |
||||||
|
int coords, classes, anchors, classfix; |
||||||
|
float thresh, nmsThreshold; |
||||||
|
bool useSoftmaxTree, useSoftmax; |
||||||
|
|
||||||
|
RegionLayerImpl(const LayerParams& params) |
||||||
|
{ |
||||||
|
setParamsFrom(params); |
||||||
|
CV_Assert(blobs.size() == 1); |
||||||
|
|
||||||
|
thresh = params.get<float>("thresh", 0.2); |
||||||
|
coords = params.get<int>("coords", 4); |
||||||
|
classes = params.get<int>("classes", 0); |
||||||
|
anchors = params.get<int>("anchors", 5); |
||||||
|
classfix = params.get<int>("classfix", 0); |
||||||
|
useSoftmaxTree = params.get<bool>("softmax_tree", false); |
||||||
|
useSoftmax = params.get<bool>("softmax", false); |
||||||
|
nmsThreshold = params.get<float>("nms_threshold", 0.4); |
||||||
|
|
||||||
|
CV_Assert(nmsThreshold >= 0.); |
||||||
|
CV_Assert(coords == 4); |
||||||
|
CV_Assert(classes >= 1); |
||||||
|
CV_Assert(anchors >= 1); |
||||||
|
CV_Assert(useSoftmaxTree || useSoftmax); |
||||||
|
} |
||||||
|
|
||||||
|
bool getMemoryShapes(const std::vector<MatShape> &inputs, |
||||||
|
const int requiredOutputs, |
||||||
|
std::vector<MatShape> &outputs, |
||||||
|
std::vector<MatShape> &internals) const |
||||||
|
{ |
||||||
|
CV_Assert(inputs.size() > 0); |
||||||
|
CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors); |
||||||
|
outputs = std::vector<MatShape>(inputs.size(), shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors)); |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool supportBackend(int backendId) |
||||||
|
{ |
||||||
|
return backendId == DNN_BACKEND_DEFAULT; |
||||||
|
} |
||||||
|
|
||||||
|
float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); } |
||||||
|
|
||||||
|
void softmax_activate(const float* input, const int n, const float temp, float* output) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
float sum = 0; |
||||||
|
float largest = -FLT_MAX; |
||||||
|
for (i = 0; i < n; ++i) { |
||||||
|
if (input[i] > largest) largest = input[i]; |
||||||
|
} |
||||||
|
for (i = 0; i < n; ++i) { |
||||||
|
float e = exp((input[i] - largest) / temp); |
||||||
|
sum += e; |
||||||
|
output[i] = e; |
||||||
|
} |
||||||
|
for (i = 0; i < n; ++i) { |
||||||
|
output[i] /= sum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) |
||||||
|
{ |
||||||
|
CV_TRACE_FUNCTION(); |
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
||||||
|
|
||||||
|
CV_Assert(inputs.size() >= 1); |
||||||
|
int const cell_size = classes + coords + 1; |
||||||
|
|
||||||
|
const float* biasData = blobs[0].ptr<float>(); |
||||||
|
|
||||||
|
for (size_t ii = 0; ii < outputs.size(); ii++) |
||||||
|
{ |
||||||
|
Mat &inpBlob = *inputs[ii]; |
||||||
|
Mat &outBlob = outputs[ii]; |
||||||
|
|
||||||
|
int rows = inpBlob.size[1]; |
||||||
|
int cols = inpBlob.size[2]; |
||||||
|
|
||||||
|
const float *srcData = inpBlob.ptr<float>(); |
||||||
|
float *dstData = outBlob.ptr<float>(); |
||||||
|
|
||||||
|
// logistic activation for t0, for each grid cell (X x Y x Anchor-index)
|
||||||
|
for (int i = 0; i < rows*cols*anchors; ++i) { |
||||||
|
int index = cell_size*i; |
||||||
|
float x = srcData[index + 4]; |
||||||
|
dstData[index + 4] = logistic_activate(x); // logistic activation
|
||||||
|
} |
||||||
|
|
||||||
|
if (useSoftmaxTree) { // Yolo 9000
|
||||||
|
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented"); |
||||||
|
} |
||||||
|
else if (useSoftmax) { // Yolo v2
|
||||||
|
// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
|
||||||
|
for (int i = 0; i < rows*cols*anchors; ++i) { |
||||||
|
int index = cell_size*i; |
||||||
|
softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5); |
||||||
|
} |
||||||
|
|
||||||
|
for (int x = 0; x < cols; ++x) |
||||||
|
for(int y = 0; y < rows; ++y) |
||||||
|
for (int a = 0; a < anchors; ++a) { |
||||||
|
int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor
|
||||||
|
int p_index = index * cell_size + 4; |
||||||
|
float scale = dstData[p_index]; |
||||||
|
if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0;
|
||||||
|
int box_index = index * cell_size; |
||||||
|
|
||||||
|
dstData[box_index + 0] = (x + logistic_activate(srcData[box_index + 0])) / cols; |
||||||
|
dstData[box_index + 1] = (y + logistic_activate(srcData[box_index + 1])) / rows; |
||||||
|
dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / cols; |
||||||
|
dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / rows; |
||||||
|
|
||||||
|
int class_index = index * cell_size + 5; |
||||||
|
|
||||||
|
if (useSoftmaxTree) { |
||||||
|
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented"); |
||||||
|
} |
||||||
|
else { |
||||||
|
for (int j = 0; j < classes; ++j) { |
||||||
|
float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability
|
||||||
|
dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0;
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
if (nmsThreshold > 0) { |
||||||
|
do_nms_sort(dstData, rows*cols*anchors, nmsThreshold); |
||||||
|
//do_nms(dstData, rows*cols*anchors, nmsThreshold);
|
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
struct box { |
||||||
|
float x, y, w, h; |
||||||
|
float *probs; |
||||||
|
}; |
||||||
|
|
||||||
|
float overlap(float x1, float w1, float x2, float w2) |
||||||
|
{ |
||||||
|
float l1 = x1 - w1 / 2; |
||||||
|
float l2 = x2 - w2 / 2; |
||||||
|
float left = l1 > l2 ? l1 : l2; |
||||||
|
float r1 = x1 + w1 / 2; |
||||||
|
float r2 = x2 + w2 / 2; |
||||||
|
float right = r1 < r2 ? r1 : r2; |
||||||
|
return right - left; |
||||||
|
} |
||||||
|
|
||||||
|
float box_intersection(box a, box b) |
||||||
|
{ |
||||||
|
float w = overlap(a.x, a.w, b.x, b.w); |
||||||
|
float h = overlap(a.y, a.h, b.y, b.h); |
||||||
|
if (w < 0 || h < 0) return 0; |
||||||
|
float area = w*h; |
||||||
|
return area; |
||||||
|
} |
||||||
|
|
||||||
|
float box_union(box a, box b) |
||||||
|
{ |
||||||
|
float i = box_intersection(a, b); |
||||||
|
float u = a.w*a.h + b.w*b.h - i; |
||||||
|
return u; |
||||||
|
} |
||||||
|
|
||||||
|
float box_iou(box a, box b) |
||||||
|
{ |
||||||
|
return box_intersection(a, b) / box_union(a, b); |
||||||
|
} |
||||||
|
|
||||||
|
struct sortable_bbox { |
||||||
|
int index; |
||||||
|
float *probs; |
||||||
|
}; |
||||||
|
|
||||||
|
struct nms_comparator { |
||||||
|
int k; |
||||||
|
nms_comparator(int _k) : k(_k) {} |
||||||
|
bool operator ()(sortable_bbox v1, sortable_bbox v2) { |
||||||
|
return v2.probs[k] < v1.probs[k]; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
void do_nms_sort(float *detections, int total, float nms_thresh) |
||||||
|
{ |
||||||
|
std::vector<box> boxes(total); |
||||||
|
for (int i = 0; i < total; ++i) { |
||||||
|
box &b = boxes[i]; |
||||||
|
int box_index = i * (classes + coords + 1); |
||||||
|
b.x = detections[box_index + 0]; |
||||||
|
b.y = detections[box_index + 1]; |
||||||
|
b.w = detections[box_index + 2]; |
||||||
|
b.h = detections[box_index + 3]; |
||||||
|
int class_index = i * (classes + 5) + 5; |
||||||
|
b.probs = (detections + class_index); |
||||||
|
} |
||||||
|
|
||||||
|
std::vector<sortable_bbox> s(total); |
||||||
|
|
||||||
|
for (int i = 0; i < total; ++i) { |
||||||
|
s[i].index = i; |
||||||
|
int class_index = i * (classes + 5) + 5; |
||||||
|
s[i].probs = (detections + class_index); |
||||||
|
} |
||||||
|
|
||||||
|
for (int k = 0; k < classes; ++k) { |
||||||
|
std::stable_sort(s.begin(), s.end(), nms_comparator(k)); |
||||||
|
for (int i = 0; i < total; ++i) { |
||||||
|
if (boxes[s[i].index].probs[k] == 0) continue; |
||||||
|
box a = boxes[s[i].index]; |
||||||
|
for (int j = i + 1; j < total; ++j) { |
||||||
|
box b = boxes[s[j].index]; |
||||||
|
if (box_iou(a, b) > nms_thresh) { |
||||||
|
boxes[s[j].index].probs[k] = 0; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void do_nms(float *detections, int total, float nms_thresh) |
||||||
|
{ |
||||||
|
std::vector<box> boxes(total); |
||||||
|
for (int i = 0; i < total; ++i) { |
||||||
|
box &b = boxes[i]; |
||||||
|
int box_index = i * (classes + coords + 1); |
||||||
|
b.x = detections[box_index + 0]; |
||||||
|
b.y = detections[box_index + 1]; |
||||||
|
b.w = detections[box_index + 2]; |
||||||
|
b.h = detections[box_index + 3]; |
||||||
|
int class_index = i * (classes + 5) + 5; |
||||||
|
b.probs = (detections + class_index); |
||||||
|
} |
||||||
|
|
||||||
|
for (int i = 0; i < total; ++i) { |
||||||
|
bool any = false; |
||||||
|
for (int k = 0; k < classes; ++k) any = any || (boxes[i].probs[k] > 0); |
||||||
|
if (!any) { |
||||||
|
continue; |
||||||
|
} |
||||||
|
for (int j = i + 1; j < total; ++j) { |
||||||
|
if (box_iou(boxes[i], boxes[j]) > nms_thresh) { |
||||||
|
for (int k = 0; k < classes; ++k) { |
||||||
|
if (boxes[i].probs[k] < boxes[j].probs[k]) boxes[i].probs[k] = 0; |
||||||
|
else boxes[j].probs[k] = 0; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
virtual int64 getFLOPS(const std::vector<MatShape> &inputs, |
||||||
|
const std::vector<MatShape> &outputs) const |
||||||
|
{ |
||||||
|
(void)outputs; // suppress unused variable warning
|
||||||
|
|
||||||
|
int64 flops = 0; |
||||||
|
for(int i = 0; i < inputs.size(); i++) |
||||||
|
{ |
||||||
|
flops += 60*total(inputs[i]); |
||||||
|
} |
||||||
|
return flops; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
Ptr<RegionLayer> RegionLayer::create(const LayerParams& params) |
||||||
|
{ |
||||||
|
return Ptr<RegionLayer>(new RegionLayerImpl(params)); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace dnn
|
||||||
|
} // namespace cv
|
@ -0,0 +1,140 @@ |
|||||||
|
/*M ///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "../precomp.hpp" |
||||||
|
#include <opencv2/dnn/shape_utils.hpp> |
||||||
|
#include <opencv2/dnn/all_layers.hpp> |
||||||
|
#include <iostream> |
||||||
|
|
||||||
|
namespace cv |
||||||
|
{ |
||||||
|
namespace dnn |
||||||
|
{ |
||||||
|
|
||||||
|
class ReorgLayerImpl : public ReorgLayer |
||||||
|
{ |
||||||
|
int reorgStride; |
||||||
|
public: |
||||||
|
|
||||||
|
ReorgLayerImpl(const LayerParams& params) |
||||||
|
{ |
||||||
|
setParamsFrom(params); |
||||||
|
|
||||||
|
reorgStride = params.get<int>("reorg_stride", 2); |
||||||
|
CV_Assert(reorgStride > 0); |
||||||
|
} |
||||||
|
|
||||||
|
bool getMemoryShapes(const std::vector<MatShape> &inputs, |
||||||
|
const int requiredOutputs, |
||||||
|
std::vector<MatShape> &outputs, |
||||||
|
std::vector<MatShape> &internals) const |
||||||
|
{ |
||||||
|
CV_Assert(inputs.size() > 0); |
||||||
|
outputs = std::vector<MatShape>(inputs.size(), shape( |
||||||
|
inputs[0][0], |
||||||
|
inputs[0][1] * reorgStride * reorgStride, |
||||||
|
inputs[0][2] / reorgStride, |
||||||
|
inputs[0][3] / reorgStride)); |
||||||
|
|
||||||
|
CV_Assert(outputs[0][0] > 0 && outputs[0][1] > 0 && outputs[0][2] > 0 && outputs[0][3] > 0); |
||||||
|
CV_Assert(total(outputs[0]) == total(inputs[0])); |
||||||
|
|
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
virtual bool supportBackend(int backendId) |
||||||
|
{ |
||||||
|
return backendId == DNN_BACKEND_DEFAULT; |
||||||
|
} |
||||||
|
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) |
||||||
|
{ |
||||||
|
CV_TRACE_FUNCTION(); |
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str()); |
||||||
|
|
||||||
|
for (size_t i = 0; i < inputs.size(); i++) |
||||||
|
{ |
||||||
|
Mat srcBlob = *inputs[i]; |
||||||
|
MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]); |
||||||
|
float *dstData = outputs[0].ptr<float>(); |
||||||
|
const float *srcData = srcBlob.ptr<float>(); |
||||||
|
|
||||||
|
int channels = inputShape[1], height = inputShape[2], width = inputShape[3]; |
||||||
|
|
||||||
|
int out_c = channels / (reorgStride*reorgStride); |
||||||
|
|
||||||
|
for (int k = 0; k < channels; ++k) { |
||||||
|
for (int j = 0; j < height; ++j) { |
||||||
|
for (int i = 0; i < width; ++i) { |
||||||
|
int out_index = i + width*(j + height*k); |
||||||
|
int c2 = k % out_c; |
||||||
|
int offset = k / out_c; |
||||||
|
int w2 = i*reorgStride + offset % reorgStride; |
||||||
|
int h2 = j*reorgStride + offset / reorgStride; |
||||||
|
int in_index = w2 + width*reorgStride*(h2 + height*reorgStride*c2); |
||||||
|
dstData[out_index] = srcData[in_index]; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
virtual int64 getFLOPS(const std::vector<MatShape> &inputs, |
||||||
|
const std::vector<MatShape> &outputs) const |
||||||
|
{ |
||||||
|
(void)outputs; // suppress unused variable warning
|
||||||
|
|
||||||
|
int64 flops = 0; |
||||||
|
for(int i = 0; i < inputs.size(); i++) |
||||||
|
{ |
||||||
|
flops += 21*total(inputs[i]); |
||||||
|
} |
||||||
|
return flops; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
Ptr<ReorgLayer> ReorgLayer::create(const LayerParams& params) |
||||||
|
{ |
||||||
|
return Ptr<ReorgLayer>(new ReorgLayerImpl(params)); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace dnn
|
||||||
|
} // namespace cv
|
@ -0,0 +1,186 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
// (3-clause BSD License)
|
||||||
|
//
|
||||||
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * Neither the names of the copyright holders nor the names of the contributors
|
||||||
|
// may be used to endorse or promote products derived from this software
|
||||||
|
// without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall copyright holders or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "test_precomp.hpp" |
||||||
|
#include <opencv2/dnn/shape_utils.hpp> |
||||||
|
#include <algorithm> |
||||||
|
|
||||||
|
namespace cvtest |
||||||
|
{ |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::dnn; |
||||||
|
|
||||||
|
template<typename TString> |
||||||
|
static std::string _tf(TString filename) |
||||||
|
{ |
||||||
|
return (getOpenCVExtraDir() + "/dnn/") + filename; |
||||||
|
} |
||||||
|
|
||||||
|
TEST(Test_Darknet, read_tiny_yolo_voc) |
||||||
|
{ |
||||||
|
Net net = readNetFromDarknet(_tf("tiny-yolo-voc.cfg")); |
||||||
|
ASSERT_FALSE(net.empty()); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(Test_Darknet, read_yolo_voc) |
||||||
|
{ |
||||||
|
Net net = readNetFromDarknet(_tf("yolo-voc.cfg")); |
||||||
|
ASSERT_FALSE(net.empty()); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(Reproducibility_TinyYoloVoc, Accuracy) |
||||||
|
{ |
||||||
|
Net net; |
||||||
|
{ |
||||||
|
const string cfg = findDataFile("dnn/tiny-yolo-voc.cfg", false); |
||||||
|
const string model = findDataFile("dnn/tiny-yolo-voc.weights", false); |
||||||
|
net = readNetFromDarknet(cfg, model); |
||||||
|
ASSERT_FALSE(net.empty()); |
||||||
|
} |
||||||
|
|
||||||
|
// dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
|
||||||
|
Mat sample = imread(_tf("dog416.png")); |
||||||
|
ASSERT_TRUE(!sample.empty()); |
||||||
|
|
||||||
|
Size inputSize(416, 416); |
||||||
|
|
||||||
|
if (sample.size() != inputSize) |
||||||
|
resize(sample, sample, inputSize); |
||||||
|
|
||||||
|
net.setInput(blobFromImage(sample, 1 / 255.F), "data"); |
||||||
|
Mat out = net.forward("detection_out"); |
||||||
|
|
||||||
|
Mat detection; |
||||||
|
const float confidenceThreshold = 0.24; |
||||||
|
|
||||||
|
for (int i = 0; i < out.rows; i++) { |
||||||
|
const int probability_index = 5; |
||||||
|
const int probability_size = out.cols - probability_index; |
||||||
|
float *prob_array_ptr = &out.at<float>(i, probability_index); |
||||||
|
size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr; |
||||||
|
float confidence = out.at<float>(i, (int)objectClass + probability_index); |
||||||
|
|
||||||
|
if (confidence > confidenceThreshold) |
||||||
|
detection.push_back(out.row(i)); |
||||||
|
} |
||||||
|
|
||||||
|
// obtained by: ./darknet detector test ./cfg/voc.data ./cfg/tiny-yolo-voc.cfg ./tiny-yolo-voc.weights -thresh 0.24 ./dog416.png
|
||||||
|
// There are 2 objects (6-car, 11-dog) with 25 values for each:
|
||||||
|
// { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
|
||||||
|
float ref_array[] = { |
||||||
|
0.736762F, 0.239551F, 0.315440F, 0.160779F, 0.761977F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.761967F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
|
||||||
|
0.287486F, 0.653731F, 0.315579F, 0.534527F, 0.782737F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.780595F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F |
||||||
|
}; |
||||||
|
|
||||||
|
const int number_of_objects = 2; |
||||||
|
Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array); |
||||||
|
|
||||||
|
normAssert(ref, detection); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(Reproducibility_YoloVoc, Accuracy) |
||||||
|
{ |
||||||
|
Net net; |
||||||
|
{ |
||||||
|
const string cfg = findDataFile("dnn/yolo-voc.cfg", false); |
||||||
|
const string model = findDataFile("dnn/yolo-voc.weights", false); |
||||||
|
net = readNetFromDarknet(cfg, model); |
||||||
|
ASSERT_FALSE(net.empty()); |
||||||
|
} |
||||||
|
|
||||||
|
// dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
|
||||||
|
Mat sample = imread(_tf("dog416.png")); |
||||||
|
ASSERT_TRUE(!sample.empty()); |
||||||
|
|
||||||
|
Size inputSize(416, 416); |
||||||
|
|
||||||
|
if (sample.size() != inputSize) |
||||||
|
resize(sample, sample, inputSize); |
||||||
|
|
||||||
|
net.setInput(blobFromImage(sample, 1 / 255.F), "data"); |
||||||
|
Mat out = net.forward("detection_out"); |
||||||
|
|
||||||
|
Mat detection; |
||||||
|
const float confidenceThreshold = 0.24; |
||||||
|
|
||||||
|
for (int i = 0; i < out.rows; i++) { |
||||||
|
const int probability_index = 5; |
||||||
|
const int probability_size = out.cols - probability_index; |
||||||
|
float *prob_array_ptr = &out.at<float>(i, probability_index); |
||||||
|
size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr; |
||||||
|
float confidence = out.at<float>(i, (int)objectClass + probability_index); |
||||||
|
|
||||||
|
if (confidence > confidenceThreshold) |
||||||
|
detection.push_back(out.row(i)); |
||||||
|
} |
||||||
|
|
||||||
|
// obtained by: ./darknet detector test ./cfg/voc.data ./cfg/yolo-voc.cfg ./yolo-voc.weights -thresh 0.24 ./dog416.png
|
||||||
|
// There are 3 objects (6-car, 1-bicycle, 11-dog) with 25 values for each:
|
||||||
|
// { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
|
||||||
|
float ref_array[] = { |
||||||
|
0.740161F, 0.214100F, 0.325575F, 0.173418F, 0.750769F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.750469F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
|
||||||
|
0.501618F, 0.504757F, 0.461713F, 0.481310F, 0.783550F, 0.000000F, 0.780879F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
|
||||||
|
0.279968F, 0.638651F, 0.282737F, 0.600284F, 0.901864F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.901615F, |
||||||
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F |
||||||
|
}; |
||||||
|
|
||||||
|
const int number_of_objects = 3; |
||||||
|
Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array); |
||||||
|
|
||||||
|
normAssert(ref, detection); |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,117 @@ |
|||||||
|
#include <opencv2/dnn.hpp> |
||||||
|
#include <opencv2/dnn/shape_utils.hpp> |
||||||
|
#include <opencv2/imgproc.hpp> |
||||||
|
#include <opencv2/highgui.hpp> |
||||||
|
using namespace cv; |
||||||
|
using namespace cv::dnn; |
||||||
|
|
||||||
|
#include <fstream> |
||||||
|
#include <iostream> |
||||||
|
#include <algorithm> |
||||||
|
#include <cstdlib> |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
const size_t network_width = 416; |
||||||
|
const size_t network_height = 416; |
||||||
|
|
||||||
|
const char* about = "This sample uses You only look once (YOLO)-Detector " |
||||||
|
"(https://arxiv.org/abs/1612.08242)" |
||||||
|
"to detect objects on image\n"; // TODO: link
|
||||||
|
|
||||||
|
const char* params |
||||||
|
= "{ help | false | print usage }" |
||||||
|
"{ cfg | | model configuration }" |
||||||
|
"{ model | | model weights }" |
||||||
|
"{ image | | image for detection }" |
||||||
|
"{ min_confidence | 0.24 | min confidence }"; |
||||||
|
|
||||||
|
int main(int argc, char** argv) |
||||||
|
{ |
||||||
|
cv::CommandLineParser parser(argc, argv, params); |
||||||
|
|
||||||
|
if (parser.get<bool>("help")) |
||||||
|
{ |
||||||
|
std::cout << about << std::endl; |
||||||
|
parser.printMessage(); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
String modelConfiguration = parser.get<string>("cfg"); |
||||||
|
String modelBinary = parser.get<string>("model"); |
||||||
|
|
||||||
|
//! [Initialize network]
|
||||||
|
dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary); |
||||||
|
//! [Initialize network]
|
||||||
|
|
||||||
|
if (net.empty()) |
||||||
|
{ |
||||||
|
cerr << "Can't load network by using the following files: " << endl; |
||||||
|
cerr << "cfg-file: " << modelConfiguration << endl; |
||||||
|
cerr << "weights-file: " << modelBinary << endl; |
||||||
|
cerr << "Models can be downloaded here:" << endl; |
||||||
|
cerr << "https://pjreddie.com/darknet/yolo/" << endl; |
||||||
|
exit(-1); |
||||||
|
} |
||||||
|
|
||||||
|
cv::Mat frame = cv::imread(parser.get<string>("image")); |
||||||
|
|
||||||
|
//! [Resizing without keeping aspect ratio]
|
||||||
|
cv::Mat resized; |
||||||
|
cv::resize(frame, resized, cv::Size(network_width, network_height)); |
||||||
|
//! [Resizing without keeping aspect ratio]
|
||||||
|
|
||||||
|
//! [Prepare blob]
|
||||||
|
Mat inputBlob = blobFromImage(resized, 1 / 255.F); //Convert Mat to batch of images
|
||||||
|
//! [Prepare blob]
|
||||||
|
|
||||||
|
//! [Set input blob]
|
||||||
|
net.setInput(inputBlob, "data"); //set the network input
|
||||||
|
//! [Set input blob]
|
||||||
|
|
||||||
|
//! [Make forward pass]
|
||||||
|
cv::Mat detectionMat = net.forward("detection_out"); //compute output
|
||||||
|
//! [Make forward pass]
|
||||||
|
|
||||||
|
|
||||||
|
float confidenceThreshold = parser.get<float>("min_confidence"); |
||||||
|
for (int i = 0; i < detectionMat.rows; i++) |
||||||
|
{ |
||||||
|
const int probability_index = 5; |
||||||
|
const int probability_size = detectionMat.cols - probability_index; |
||||||
|
float *prob_array_ptr = &detectionMat.at<float>(i, probability_index); |
||||||
|
|
||||||
|
size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr; |
||||||
|
float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index); |
||||||
|
|
||||||
|
if (confidence > confidenceThreshold) |
||||||
|
{ |
||||||
|
float x = detectionMat.at<float>(i, 0); |
||||||
|
float y = detectionMat.at<float>(i, 1); |
||||||
|
float width = detectionMat.at<float>(i, 2); |
||||||
|
float height = detectionMat.at<float>(i, 3); |
||||||
|
float xLeftBottom = (x - width / 2) * frame.cols; |
||||||
|
float yLeftBottom = (y - height / 2) * frame.rows; |
||||||
|
float xRightTop = (x + width / 2) * frame.cols; |
||||||
|
float yRightTop = (y + height / 2) * frame.rows; |
||||||
|
|
||||||
|
std::cout << "Class: " << objectClass << std::endl; |
||||||
|
std::cout << "Confidence: " << confidence << std::endl; |
||||||
|
|
||||||
|
std::cout << " " << xLeftBottom |
||||||
|
<< " " << yLeftBottom |
||||||
|
<< " " << xRightTop |
||||||
|
<< " " << yRightTop << std::endl; |
||||||
|
|
||||||
|
Rect object((int)xLeftBottom, (int)yLeftBottom, |
||||||
|
(int)(xRightTop - xLeftBottom), |
||||||
|
(int)(yRightTop - yLeftBottom)); |
||||||
|
|
||||||
|
rectangle(frame, object, Scalar(0, 255, 0)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
imshow("detections", frame); |
||||||
|
waitKey(); |
||||||
|
|
||||||
|
return 0; |
||||||
|
} // main
|
Loading…
Reference in new issue