Merge pull request #750 from ludv1x:dnn-python-bindings
commit
b1346e5ab6
50 changed files with 56934 additions and 695 deletions
@ -0,0 +1,31 @@ |
||||
set(GoogleNet_url "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel") |
||||
set(GoogleNet_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/bvlc_googlenet.caffemodel") |
||||
set(GoogleNet_sha "405fc5acd08a3bb12de8ee5e23a96bec22f08204") |
||||
|
||||
set(VGG16_url "http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel") |
||||
set(GG16_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/VGG_ILSVRC_16_layers.caffemodel") |
||||
|
||||
set(voc-fcn32s_url "http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel") |
||||
set(voc-fcn32s_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/fcn32s-heavy-pascal.caffemodel") |
||||
|
||||
if(NOT model) |
||||
set(model "GoogleNet") |
||||
endif() |
||||
|
||||
message(STATUS "Downloading ${${model}_url} to ${${model}_dst}") |
||||
|
||||
if(NOT EXISTS ${${model}_dst}) |
||||
if(${${model}_sha}) |
||||
file(DOWNLOAD ${${model}_url} ${${model}_dst} SHOW_PROGRESS EXPECTED_HASH SHA1=${${model}_sha} STATUS status_vec) |
||||
else() |
||||
file(DOWNLOAD ${${model}_url} ${${model}_dst} SHOW_PROGRESS STATUS status_vec) |
||||
endif() |
||||
|
||||
list(GET status_vec 0 status) |
||||
list(GET status_vec 1 status_msg) |
||||
if(status EQUAL 0) |
||||
message(STATUS "Ok! ${status_msg}") |
||||
else() |
||||
message(STATUS "Fail! ${status_msg}") |
||||
endif() |
||||
endif() |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,108 @@ |
||||
#ifdef HAVE_OPENCV_DNN |
||||
typedef dnn::DictValue LayerId; |
||||
typedef std::vector<cv::dnn::Blob> vector_Blob; |
||||
|
||||
template<> |
||||
bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name); |
||||
|
||||
template<> struct pyopencvVecConverter<dnn::Blob> |
||||
{ |
||||
static bool to(PyObject* obj, std::vector<dnn::Blob>& value, const ArgInfo info) |
||||
{ |
||||
if (PyArray_Check(obj)) |
||||
{ |
||||
value.resize(1); |
||||
return pyopencv_to(obj, value[0], info.name); |
||||
} |
||||
|
||||
return pyopencv_to_generic_vec(obj, value, info); |
||||
} |
||||
|
||||
static PyObject* from(const std::vector<dnn::Blob>& value) |
||||
{ |
||||
return pyopencv_from_generic_vec(value); |
||||
} |
||||
}; |
||||
|
||||
template<> |
||||
bool pyopencv_to(PyObject *o, std::vector<dnn::Blob> &blobs, const char *name) //required for Layer::blobs RW
|
||||
{ |
||||
return pyopencvVecConverter<dnn::Blob>::to(o, blobs, ArgInfo(name, false)); |
||||
} |
||||
|
||||
template<> |
||||
bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name) |
||||
{ |
||||
Mat &dst = blob.matRef(); |
||||
if (!pyopencv_to(o, dst, name)) |
||||
return false; |
||||
|
||||
if (PyArray_Check(o)) //try fix channels
|
||||
{ |
||||
PyArrayObject* oarr = (PyArrayObject*) o; |
||||
|
||||
if (PyArray_NDIM(oarr) == dst.dims) |
||||
return true; |
||||
|
||||
int ndims = PyArray_NDIM(oarr); |
||||
std::vector<int> shape(ndims); |
||||
const npy_intp* _sizes = PyArray_DIMS(oarr); |
||||
for (int i = 0; i < ndims; i++) |
||||
shape[i] = (int)_sizes[i]; |
||||
|
||||
dst = dst.reshape(1, ndims, &shape[0]); |
||||
} |
||||
|
||||
return true; |
||||
} |
||||
|
||||
template<> |
||||
PyObject *pyopencv_from(const dnn::Blob &blob) |
||||
{ |
||||
return pyopencv_from(blob.matRefConst()); |
||||
} |
||||
|
||||
template<> |
||||
bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name) |
||||
{ |
||||
(void)name; |
||||
if (!o || o == Py_None) |
||||
return true; //Current state will be used
|
||||
else if (PyLong_Check(o)) |
||||
{ |
||||
dv = dnn::DictValue((int64)PyLong_AsLongLong(o)); |
||||
return true; |
||||
} |
||||
else if (PyFloat_Check(o)) |
||||
{ |
||||
dv = dnn::DictValue(PyFloat_AS_DOUBLE(o)); |
||||
return true; |
||||
} |
||||
else if (PyString_Check(o)) |
||||
{ |
||||
dv = dnn::DictValue(String(PyString_AsString(o))); |
||||
return true; |
||||
} |
||||
else |
||||
return false; |
||||
} |
||||
|
||||
template<> |
||||
bool pyopencv_to(PyObject *o, dnn::BlobShape &shape, const char *name) |
||||
{ |
||||
std::vector<int> data; |
||||
if (!pyopencv_to_generic_vec(o, data, ArgInfo(name, false))) |
||||
return false; |
||||
|
||||
shape = data.size() ? dnn::BlobShape((int)data.size(), &data[0]) : dnn::BlobShape::empty(); |
||||
return true; |
||||
} |
||||
|
||||
template<> |
||||
PyObject *pyopencv_from(const dnn::BlobShape &shape) |
||||
{ |
||||
std::vector<int> data(shape.ptr(), shape.ptr() + shape.dims()); |
||||
return pyopencv_from_generic_vec(data); |
||||
} |
||||
|
||||
#endif |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,34 @@ |
||||
from __future__ import print_function |
||||
import numpy as np |
||||
import cv2 |
||||
from cv2 import dnn |
||||
import timeit |
||||
|
||||
def prepare_image(img): |
||||
img = cv2.resize(img, (224, 224)) |
||||
#convert interleaved image (RGBRGB) to planar(RRGGBB) |
||||
blob = np.moveaxis(img, 2, 0) |
||||
blob = np.reshape(blob.astype(np.float32), (-1, 3, 224, 224)) |
||||
return blob |
||||
|
||||
def timeit_forward(net): |
||||
print("OpenCL:", cv2.ocl.useOpenCL()) |
||||
print("Runtime:", timeit.timeit(lambda: net.forward(), number=10)) |
||||
|
||||
def get_class_list(): |
||||
with open('synset_words.txt', 'rt') as f: |
||||
return [ x[x.find(" ") + 1 :] for x in f ] |
||||
|
||||
blob = prepare_image(cv2.imread('space_shuttle.jpg')) |
||||
print("Input:", blob.shape, blob.dtype) |
||||
|
||||
cv2.ocl.setUseOpenCL(True) #Disable OCL if you want |
||||
net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel') |
||||
net.setBlob(".data", blob) |
||||
net.forward() |
||||
#timeit_forward(net) #Uncomment to check performance |
||||
|
||||
prob = net.getBlob("prob") |
||||
print("Output:", prob.shape, prob.dtype) |
||||
classes = get_class_list() |
||||
print("Best match", classes[prob.argmax()]) |
@ -0,0 +1,153 @@ |
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include <cstdlib> |
||||
using namespace std; |
||||
|
||||
const size_t width = 300; |
||||
const size_t height = 300; |
||||
|
||||
Mat getMean(const size_t& imageHeight, const size_t& imageWidth) |
||||
{ |
||||
Mat mean; |
||||
|
||||
const int meanValues[3] = {104, 117, 123}; |
||||
vector<Mat> meanChannels; |
||||
for(size_t i = 0; i < 3; i++) |
||||
{ |
||||
Mat channel(imageHeight, imageWidth, CV_32F, Scalar(meanValues[i])); |
||||
meanChannels.push_back(channel); |
||||
} |
||||
cv::merge(meanChannels, mean); |
||||
return mean; |
||||
} |
||||
|
||||
Mat preprocess(const Mat& frame) |
||||
{ |
||||
Mat preprocessed; |
||||
frame.convertTo(preprocessed, CV_32FC3); |
||||
resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
|
||||
|
||||
Mat mean = getMean(width, height); |
||||
cv::subtract(preprocessed, mean, preprocessed); |
||||
|
||||
return preprocessed; |
||||
} |
||||
|
||||
const char* about = "This sample uses Single-Shot Detector " |
||||
"(https://arxiv.org/abs/1512.02325)" |
||||
"to detect objects on image\n"; // TODO: link
|
||||
|
||||
const char* params |
||||
= "{ help | false | print usage }" |
||||
"{ proto | | model configuration }" |
||||
"{ model | | model weights }" |
||||
"{ image | | image for detection }" |
||||
"{ min_confidence | 0.5 | min confidence }"; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
cv::CommandLineParser parser(argc, argv, params); |
||||
|
||||
if (parser.get<bool>("help")) |
||||
{ |
||||
std::cout << about << std::endl; |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
cv::dnn::initModule(); //Required if OpenCV is built as static libs
|
||||
|
||||
String modelConfiguration = parser.get<string>("proto"); |
||||
String modelBinary = parser.get<string>("model"); |
||||
|
||||
//! [Create the importer of Caffe model]
|
||||
Ptr<dnn::Importer> importer; |
||||
|
||||
// Import Caffe SSD model
|
||||
try |
||||
{ |
||||
importer = dnn::createCaffeImporter(modelConfiguration, modelBinary); |
||||
} |
||||
catch (const cv::Exception &err) //Importer can throw errors, we will catch them
|
||||
{ |
||||
cerr << err.msg << endl; |
||||
} |
||||
//! [Create the importer of Caffe model]
|
||||
|
||||
if (!importer) |
||||
{ |
||||
cerr << "Can't load network by using the following files: " << endl; |
||||
cerr << "prototxt: " << modelConfiguration << endl; |
||||
cerr << "caffemodel: " << modelBinary << endl; |
||||
cerr << "Models can be downloaded here:" << endl; |
||||
cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
//! [Initialize network]
|
||||
dnn::Net net; |
||||
importer->populateNet(net); |
||||
importer.release(); //We don't need importer anymore
|
||||
//! [Initialize network]
|
||||
|
||||
cv::Mat frame = cv::imread(parser.get<string>("image"), -1); |
||||
|
||||
//! [Prepare blob]
|
||||
Mat preprocessedFrame = preprocess(frame); |
||||
|
||||
dnn::Blob inputBlob = dnn::Blob::fromImages(preprocessedFrame); //Convert Mat to dnn::Blob image
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setBlob(".data", inputBlob); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Make forward pass]
|
||||
net.forward(); //compute output
|
||||
//! [Make forward pass]
|
||||
|
||||
//! [Gather output]
|
||||
dnn::Blob detection = net.getBlob("detection_out"); |
||||
Mat detectionMat(detection.rows(), detection.cols(), CV_32F, detection.ptrf()); |
||||
|
||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
||||
for(int i = 0; i < detectionMat.rows; i++) |
||||
{ |
||||
float confidence = detectionMat.at<float>(i, 2); |
||||
|
||||
if(confidence > confidenceThreshold) |
||||
{ |
||||
size_t objectClass = detectionMat.at<float>(i, 1); |
||||
|
||||
float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols; |
||||
float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows; |
||||
float xRightTop = detectionMat.at<float>(i, 5) * frame.cols; |
||||
float yRightTop = detectionMat.at<float>(i, 6) * frame.rows; |
||||
|
||||
std::cout << "Class: " << objectClass << std::endl; |
||||
std::cout << "Confidence: " << confidence << std::endl; |
||||
|
||||
std::cout << " " << xLeftBottom |
||||
<< " " << yLeftBottom |
||||
<< " " << xRightTop |
||||
<< " " << yRightTop << std::endl; |
||||
|
||||
Rect object(xLeftBottom, yLeftBottom, |
||||
xRightTop - xLeftBottom, |
||||
yRightTop - yLeftBottom); |
||||
|
||||
rectangle(frame, object, Scalar(0, 255, 0)); |
||||
} |
||||
} |
||||
|
||||
imshow("detections", frame); |
||||
waitKey(); |
||||
|
||||
return 0; |
||||
} // main
|
@ -1,79 +0,0 @@ |
||||
#!/usr/bin/env python |
||||
import os |
||||
import sys |
||||
import time |
||||
import urllib |
||||
import hashlib |
||||
import argparse |
||||
import json |
||||
|
||||
|
||||
def reporthook(count, block_size, total_size): |
||||
""" |
||||
From http://blog.moleculea.com/2012/10/04/urlretrieve-progres-indicator/ |
||||
""" |
||||
global start_time |
||||
global prev_duration |
||||
if count == 0: |
||||
start_time = time.time() |
||||
prev_duration = -1 |
||||
return |
||||
duration = max(1, time.time() - start_time) |
||||
if int(duration) == int(prev_duration): |
||||
return |
||||
|
||||
progress_size = int(count * block_size) |
||||
speed = int(progress_size / (1024 * duration)) |
||||
percent = int(count * block_size * 100 / total_size) |
||||
sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" % |
||||
(percent, progress_size / (1024 * 1024), speed, duration)) |
||||
sys.stdout.flush() |
||||
prev_duration = duration |
||||
|
||||
|
||||
# Function for checking SHA1. |
||||
def model_checks_out(filename, sha1): |
||||
with open(filename, 'r') as f: |
||||
return hashlib.sha1(f.read()).hexdigest() == sha1 |
||||
|
||||
def model_download(filename, url, sha1): |
||||
# Check if model exists. |
||||
if os.path.exists(filename) and model_checks_out(filename, sha1): |
||||
print("Model {} already exists.".format(filename)) |
||||
return |
||||
|
||||
# Download and verify model. |
||||
urllib.urlretrieve(url, filename, reporthook) |
||||
print model_checks_out(filename, sha1) |
||||
if not model_checks_out(filename, sha1): |
||||
print("ERROR: model {} did not download correctly!".format(url)) |
||||
sys.exit(1) |
||||
|
||||
if __name__ == '__main__': |
||||
parser = argparse.ArgumentParser(description="Downloading trained model binaries.") |
||||
parser.add_argument("download_list") |
||||
args = parser.parse_args() |
||||
|
||||
test_dir = os.environ.get("OPENCV_TEST_DATA_PATH") |
||||
if not test_dir: |
||||
print "ERROR: OPENCV_TEST_DATA_PATH environment not specified" |
||||
sys.exit(1) |
||||
|
||||
try: |
||||
with open(args.download_list, 'r') as f: |
||||
models_to_download = json.load(f) |
||||
except: |
||||
print "ERROR: Can't pasrse {}".format(args.download_list) |
||||
sys.exit(1) |
||||
|
||||
for model_name in models_to_download: |
||||
model = models_to_download[model_name] |
||||
|
||||
dst_dir = os.path.join(test_dir, os.path.dirname(model['file'])) |
||||
dst_file = os.path.join(test_dir, model['file']) |
||||
if not os.path.exists(dst_dir): |
||||
print "ERROR: Can't find module testdata path '{}'".format(dst_dir) |
||||
sys.exit(1) |
||||
|
||||
print "Downloading model '{}' to {} from {} ...".format(model_name, dst_file, model['url']) |
||||
model_download(dst_file, model['url'], model['sha1']) |
@ -1,7 +0,0 @@ |
||||
{ |
||||
"googlenet": { |
||||
"file": "dnn/bvlc_googlenet.caffemodel", |
||||
"url": "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel", |
||||
"sha1": "405fc5acd08a3bb12de8ee5e23a96bec22f08204" |
||||
} |
||||
} |
Binary file not shown.
@ -0,0 +1,750 @@ |
||||
/*M ///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp" |
||||
#include "layers_common.hpp" |
||||
#include "detection_output_layer.hpp" |
||||
#include <float.h> |
||||
#include <string> |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
|
||||
namespace util |
||||
{ |
||||
template <typename T> |
||||
std::string to_string(T value) |
||||
{ |
||||
std::ostringstream stream; |
||||
stream << value; |
||||
return stream.str(); |
||||
} |
||||
|
||||
template <typename T> |
||||
void make_error(const std::string& message1, const T& message2) |
||||
{ |
||||
std::string error(message1); |
||||
error += std::string(util::to_string<int>(message2)); |
||||
CV_Error(Error::StsBadArg, error.c_str()); |
||||
} |
||||
|
||||
template <typename T> |
||||
bool SortScorePairDescend(const std::pair<float, T>& pair1, |
||||
const std::pair<float, T>& pair2) |
||||
{ |
||||
return pair1.first > pair2.first; |
||||
} |
||||
} |
||||
|
||||
const std::string DetectionOutputLayer::_layerName = std::string("DetectionOutput"); |
||||
|
||||
bool DetectionOutputLayer::getParameterDict(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
DictValue& result) |
||||
{ |
||||
if (!params.has(parameterName)) |
||||
{ |
||||
return false; |
||||
} |
||||
|
||||
result = params.get(parameterName); |
||||
return true; |
||||
} |
||||
|
||||
template<typename T> |
||||
T DetectionOutputLayer::getParameter(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
const size_t &idx, |
||||
const bool required, |
||||
const T& defaultValue) |
||||
{ |
||||
DictValue dictValue; |
||||
bool success = getParameterDict(params, parameterName, dictValue); |
||||
if(!success) |
||||
{ |
||||
if(required) |
||||
{ |
||||
std::string message = _layerName; |
||||
message += " layer parameter does not contain "; |
||||
message += parameterName; |
||||
message += " parameter."; |
||||
CV_Error(Error::StsBadArg, message); |
||||
} |
||||
else |
||||
{ |
||||
return defaultValue; |
||||
} |
||||
} |
||||
return dictValue.get<T>(idx); |
||||
} |
||||
|
||||
void DetectionOutputLayer::getCodeType(LayerParams ¶ms) |
||||
{ |
||||
String codeTypeString = params.get<String>("code_type").toLowerCase(); |
||||
if (codeTypeString == "corner") |
||||
_codeType = caffe::PriorBoxParameter_CodeType_CORNER; |
||||
else if (codeTypeString == "center_size") |
||||
_codeType = caffe::PriorBoxParameter_CodeType_CENTER_SIZE; |
||||
else |
||||
_codeType = caffe::PriorBoxParameter_CodeType_CORNER; |
||||
} |
||||
|
||||
DetectionOutputLayer::DetectionOutputLayer(LayerParams ¶ms) : Layer(params) |
||||
{ |
||||
_numClasses = getParameter<unsigned>(params, "num_classes"); |
||||
_shareLocation = getParameter<bool>(params, "share_location"); |
||||
_numLocClasses = _shareLocation ? 1 : _numClasses; |
||||
_backgroundLabelId = getParameter<int>(params, "background_label_id"); |
||||
_varianceEncodedInTarget = getParameter<bool>(params, "variance_encoded_in_target", 0, false, false); |
||||
_keepTopK = getParameter<int>(params, "keep_top_k"); |
||||
_confidenceThreshold = getParameter<float>(params, "confidence_threshold", 0, false, -FLT_MAX); |
||||
_topK = getParameter<int>(params, "top_k", 0, false, -1); |
||||
|
||||
getCodeType(params); |
||||
|
||||
// Parameters used in nms.
|
||||
_nmsThreshold = getParameter<float>(params, "nms_threshold"); |
||||
CV_Assert(_nmsThreshold > 0.); |
||||
} |
||||
|
||||
void DetectionOutputLayer::checkInputs(const std::vector<Blob*> &inputs) |
||||
{ |
||||
for (size_t i = 1; i < inputs.size(); i++) |
||||
{ |
||||
for (size_t j = 0; j < _numAxes; j++) |
||||
{ |
||||
CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::allocate(const std::vector<Blob*> &inputs, |
||||
std::vector<Blob> &outputs) |
||||
{ |
||||
CV_Assert(inputs.size() > 0); |
||||
CV_Assert(inputs[0]->num() == inputs[1]->num()); |
||||
_num = inputs[0]->num(); |
||||
|
||||
_numPriors = inputs[2]->rows() / 4; |
||||
CV_Assert((_numPriors * _numLocClasses * 4) == inputs[0]->channels()); |
||||
CV_Assert(int(_numPriors * _numClasses) == inputs[1]->channels()); |
||||
|
||||
// num() and channels() are 1.
|
||||
// Since the number of bboxes to be kept is unknown before nms, we manually
|
||||
// set it to (fake) 1.
|
||||
// Each row is a 7 dimension std::vector, which stores
|
||||
// [image_id, label, confidence, xmin, ymin, xmax, ymax]
|
||||
BlobShape outputShape = BlobShape(1, 1, 1, 7); |
||||
outputs[0].create(BlobShape(outputShape)); |
||||
} |
||||
|
||||
void DetectionOutputLayer::forward(std::vector<Blob*> &inputs, |
||||
std::vector<Blob> &outputs) |
||||
{ |
||||
const float* locationData = inputs[0]->ptrf(); |
||||
const float* confidenceData = inputs[1]->ptrf(); |
||||
const float* priorData = inputs[2]->ptrf(); |
||||
|
||||
// Retrieve all location predictions.
|
||||
std::vector<LabelBBox> allLocationPredictions; |
||||
GetLocPredictions(locationData, _num, _numPriors, _numLocClasses, |
||||
_shareLocation, &allLocationPredictions); |
||||
|
||||
// Retrieve all confidences.
|
||||
std::vector<std::map<int, std::vector<float> > > allConfidenceScores; |
||||
GetConfidenceScores(confidenceData, _num, _numPriors, _numClasses, |
||||
&allConfidenceScores); |
||||
|
||||
// Retrieve all prior bboxes. It is same within a batch since we assume all
|
||||
// images in a batch are of same dimension.
|
||||
std::vector<caffe::NormalizedBBox> priorBBoxes; |
||||
std::vector<std::vector<float> > priorVariances; |
||||
GetPriorBBoxes(priorData, _numPriors, &priorBBoxes, &priorVariances); |
||||
|
||||
// Decode all loc predictions to bboxes.
|
||||
std::vector<LabelBBox> allDecodedBBoxes; |
||||
DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, _num, |
||||
_shareLocation, _numLocClasses, _backgroundLabelId, |
||||
_codeType, _varianceEncodedInTarget, &allDecodedBBoxes); |
||||
|
||||
int numKept = 0; |
||||
std::vector<std::map<int, std::vector<int> > > allIndices; |
||||
for (int i = 0; i < _num; ++i) |
||||
{ |
||||
const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; |
||||
const std::map<int, std::vector<float> >& confidenceScores = |
||||
allConfidenceScores[i]; |
||||
std::map<int, std::vector<int> > indices; |
||||
int numDetections = 0; |
||||
for (int c = 0; c < (int)_numClasses; ++c) |
||||
{ |
||||
if (c == _backgroundLabelId) |
||||
{ |
||||
// Ignore background class.
|
||||
continue; |
||||
} |
||||
if (confidenceScores.find(c) == confidenceScores.end()) |
||||
{ |
||||
// Something bad happened if there are no predictions for current label.
|
||||
util::make_error<int>("Could not find confidence predictions for label ", c); |
||||
} |
||||
|
||||
const std::vector<float>& scores = confidenceScores.find(c)->second; |
||||
int label = _shareLocation ? -1 : c; |
||||
if (decodeBBoxes.find(label) == decodeBBoxes.end()) |
||||
{ |
||||
// Something bad happened if there are no predictions for current label.
|
||||
util::make_error<int>("Could not find location predictions for label ", label); |
||||
continue; |
||||
} |
||||
const std::vector<caffe::NormalizedBBox>& bboxes = |
||||
decodeBBoxes.find(label)->second; |
||||
ApplyNMSFast(bboxes, scores, _confidenceThreshold, _nmsThreshold, |
||||
_topK, &(indices[c])); |
||||
numDetections += indices[c].size(); |
||||
} |
||||
if (_keepTopK > -1 && numDetections > _keepTopK) |
||||
{ |
||||
std::vector<std::pair<float, std::pair<int, int> > > scoreIndexPairs; |
||||
for (std::map<int, std::vector<int> >::iterator it = indices.begin(); |
||||
it != indices.end(); ++it) |
||||
{ |
||||
int label = it->first; |
||||
const std::vector<int>& labelIndices = it->second; |
||||
if (confidenceScores.find(label) == confidenceScores.end()) |
||||
{ |
||||
// Something bad happened for current label.
|
||||
util::make_error<int>("Could not find location predictions for label ", label); |
||||
continue; |
||||
} |
||||
const std::vector<float>& scores = confidenceScores.find(label)->second; |
||||
for (size_t j = 0; j < labelIndices.size(); ++j) |
||||
{ |
||||
size_t idx = labelIndices[j]; |
||||
CV_Assert(idx < scores.size()); |
||||
scoreIndexPairs.push_back( |
||||
std::make_pair(scores[idx], std::make_pair(label, idx))); |
||||
} |
||||
} |
||||
// Keep outputs k results per image.
|
||||
std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), |
||||
util::SortScorePairDescend<std::pair<int, int> >); |
||||
scoreIndexPairs.resize(_keepTopK); |
||||
// Store the new indices.
|
||||
std::map<int, std::vector<int> > newIndices; |
||||
for (size_t j = 0; j < scoreIndexPairs.size(); ++j) |
||||
{ |
||||
int label = scoreIndexPairs[j].second.first; |
||||
int idx = scoreIndexPairs[j].second.second; |
||||
newIndices[label].push_back(idx); |
||||
} |
||||
allIndices.push_back(newIndices); |
||||
numKept += _keepTopK; |
||||
} |
||||
else |
||||
{ |
||||
allIndices.push_back(indices); |
||||
numKept += numDetections; |
||||
} |
||||
} |
||||
|
||||
if (numKept == 0) |
||||
{ |
||||
CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections"); |
||||
return; |
||||
} |
||||
std::vector<int> outputsShape(2, 1); |
||||
outputsShape.push_back(numKept); |
||||
outputsShape.push_back(7); |
||||
outputs[0].create(outputsShape); |
||||
float* outputsData = outputs[0].ptrf(); |
||||
|
||||
int count = 0; |
||||
for (int i = 0; i < _num; ++i) |
||||
{ |
||||
const std::map<int, std::vector<float> >& confidenceScores = |
||||
allConfidenceScores[i]; |
||||
const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; |
||||
for (std::map<int, std::vector<int> >::iterator it = allIndices[i].begin(); |
||||
it != allIndices[i].end(); ++it) |
||||
{ |
||||
int label = it->first; |
||||
if (confidenceScores.find(label) == confidenceScores.end()) |
||||
{ |
||||
// Something bad happened if there are no predictions for current label.
|
||||
util::make_error<int>("Could not find confidence predictions for label ", label); |
||||
continue; |
||||
} |
||||
const std::vector<float>& scores = confidenceScores.find(label)->second; |
||||
int locLabel = _shareLocation ? -1 : label; |
||||
if (decodeBBoxes.find(locLabel) == decodeBBoxes.end()) |
||||
{ |
||||
// Something bad happened if there are no predictions for current label.
|
||||
util::make_error<int>("Could not find location predictions for label ", locLabel); |
||||
continue; |
||||
} |
||||
const std::vector<caffe::NormalizedBBox>& bboxes = |
||||
decodeBBoxes.find(locLabel)->second; |
||||
std::vector<int>& indices = it->second; |
||||
|
||||
for (size_t j = 0; j < indices.size(); ++j) |
||||
{ |
||||
int idx = indices[j]; |
||||
outputsData[count * 7] = i; |
||||
outputsData[count * 7 + 1] = label; |
||||
outputsData[count * 7 + 2] = scores[idx]; |
||||
caffe::NormalizedBBox clipBBox; |
||||
ClipBBox(bboxes[idx], &clipBBox); |
||||
outputsData[count * 7 + 3] = clipBBox.xmin(); |
||||
outputsData[count * 7 + 4] = clipBBox.ymin(); |
||||
outputsData[count * 7 + 5] = clipBBox.xmax(); |
||||
outputsData[count * 7 + 6] = clipBBox.ymax(); |
||||
|
||||
++count; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
float DetectionOutputLayer::BBoxSize(const caffe::NormalizedBBox& bbox, |
||||
const bool normalized) |
||||
{ |
||||
if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin()) |
||||
{ |
||||
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
|
||||
return 0; |
||||
} |
||||
else |
||||
{ |
||||
if (bbox.has_size()) |
||||
{ |
||||
return bbox.size(); |
||||
} |
||||
else |
||||
{ |
||||
float width = bbox.xmax() - bbox.xmin(); |
||||
float height = bbox.ymax() - bbox.ymin(); |
||||
if (normalized) |
||||
{ |
||||
return width * height; |
||||
} |
||||
else |
||||
{ |
||||
// If bbox is not within range [0, 1].
|
||||
return (width + 1) * (height + 1); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::ClipBBox(const caffe::NormalizedBBox& bbox, |
||||
caffe::NormalizedBBox* clipBBox) |
||||
{ |
||||
clipBBox->set_xmin(std::max(std::min(bbox.xmin(), 1.f), 0.f)); |
||||
clipBBox->set_ymin(std::max(std::min(bbox.ymin(), 1.f), 0.f)); |
||||
clipBBox->set_xmax(std::max(std::min(bbox.xmax(), 1.f), 0.f)); |
||||
clipBBox->set_ymax(std::max(std::min(bbox.ymax(), 1.f), 0.f)); |
||||
clipBBox->clear_size(); |
||||
clipBBox->set_size(BBoxSize(*clipBBox)); |
||||
clipBBox->set_difficult(bbox.difficult()); |
||||
} |
||||
|
||||
void DetectionOutputLayer::DecodeBBox( |
||||
const caffe::NormalizedBBox& priorBBox, const std::vector<float>& priorVariance, |
||||
const CodeType codeType, const bool varianceEncodedInTarget, |
||||
const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* decodeBBox) |
||||
{ |
||||
if (codeType == caffe::PriorBoxParameter_CodeType_CORNER) |
||||
{ |
||||
if (varianceEncodedInTarget) |
||||
{ |
||||
// variance is encoded in target, we simply need to add the offset
|
||||
// predictions.
|
||||
decodeBBox->set_xmin(priorBBox.xmin() + bbox.xmin()); |
||||
decodeBBox->set_ymin(priorBBox.ymin() + bbox.ymin()); |
||||
decodeBBox->set_xmax(priorBBox.xmax() + bbox.xmax()); |
||||
decodeBBox->set_ymax(priorBBox.ymax() + bbox.ymax()); |
||||
} |
||||
else |
||||
{ |
||||
// variance is encoded in bbox, we need to scale the offset accordingly.
|
||||
decodeBBox->set_xmin( |
||||
priorBBox.xmin() + priorVariance[0] * bbox.xmin()); |
||||
decodeBBox->set_ymin( |
||||
priorBBox.ymin() + priorVariance[1] * bbox.ymin()); |
||||
decodeBBox->set_xmax( |
||||
priorBBox.xmax() + priorVariance[2] * bbox.xmax()); |
||||
decodeBBox->set_ymax( |
||||
priorBBox.ymax() + priorVariance[3] * bbox.ymax()); |
||||
} |
||||
} |
||||
else |
||||
if (codeType == caffe::PriorBoxParameter_CodeType_CENTER_SIZE) |
||||
{ |
||||
float priorWidth = priorBBox.xmax() - priorBBox.xmin(); |
||||
CV_Assert(priorWidth > 0); |
||||
|
||||
float priorHeight = priorBBox.ymax() - priorBBox.ymin(); |
||||
CV_Assert(priorHeight > 0); |
||||
|
||||
float priorCenterX = (priorBBox.xmin() + priorBBox.xmax()) / 2.; |
||||
float priorCenterY = (priorBBox.ymin() + priorBBox.ymax()) / 2.; |
||||
|
||||
float decodeBBoxCenterX, decodeBBoxCenterY; |
||||
float decodeBBoxWidth, decodeBBoxHeight; |
||||
if (varianceEncodedInTarget) |
||||
{ |
||||
// variance is encoded in target, we simply need to retore the offset
|
||||
// predictions.
|
||||
decodeBBoxCenterX = bbox.xmin() * priorWidth + priorCenterX; |
||||
decodeBBoxCenterY = bbox.ymin() * priorHeight + priorCenterY; |
||||
decodeBBoxWidth = exp(bbox.xmax()) * priorWidth; |
||||
decodeBBoxHeight = exp(bbox.ymax()) * priorHeight; |
||||
} |
||||
else |
||||
{ |
||||
// variance is encoded in bbox, we need to scale the offset accordingly.
|
||||
decodeBBoxCenterX = |
||||
priorVariance[0] * bbox.xmin() * priorWidth + priorCenterX; |
||||
decodeBBoxCenterY = |
||||
priorVariance[1] * bbox.ymin() * priorHeight + priorCenterY; |
||||
decodeBBoxWidth = |
||||
exp(priorVariance[2] * bbox.xmax()) * priorWidth; |
||||
decodeBBoxHeight = |
||||
exp(priorVariance[3] * bbox.ymax()) * priorHeight; |
||||
} |
||||
|
||||
decodeBBox->set_xmin(decodeBBoxCenterX - decodeBBoxWidth / 2.); |
||||
decodeBBox->set_ymin(decodeBBoxCenterY - decodeBBoxHeight / 2.); |
||||
decodeBBox->set_xmax(decodeBBoxCenterX + decodeBBoxWidth / 2.); |
||||
decodeBBox->set_ymax(decodeBBoxCenterY + decodeBBoxHeight / 2.); |
||||
} |
||||
else |
||||
{ |
||||
CV_Error(Error::StsBadArg, "Unknown LocLossType."); |
||||
} |
||||
float bboxSize = BBoxSize(*decodeBBox); |
||||
decodeBBox->set_size(bboxSize); |
||||
} |
||||
|
||||
void DetectionOutputLayer::DecodeBBoxes( |
||||
const std::vector<caffe::NormalizedBBox>& priorBBoxes, |
||||
const std::vector<std::vector<float> >& priorVariances, |
||||
const CodeType codeType, const bool varianceEncodedInTarget, |
||||
const std::vector<caffe::NormalizedBBox>& bboxes, |
||||
std::vector<caffe::NormalizedBBox>* decodeBBoxes) |
||||
{ |
||||
CV_Assert(priorBBoxes.size() == priorVariances.size()); |
||||
CV_Assert(priorBBoxes.size() == bboxes.size()); |
||||
int numBBoxes = priorBBoxes.size(); |
||||
if (numBBoxes >= 1) |
||||
{ |
||||
CV_Assert(priorVariances[0].size() == 4); |
||||
} |
||||
decodeBBoxes->clear(); |
||||
for (int i = 0; i < numBBoxes; ++i) |
||||
{ |
||||
caffe::NormalizedBBox decodeBBox; |
||||
DecodeBBox(priorBBoxes[i], priorVariances[i], codeType, |
||||
varianceEncodedInTarget, bboxes[i], &decodeBBox); |
||||
decodeBBoxes->push_back(decodeBBox); |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::DecodeBBoxesAll( |
||||
const std::vector<LabelBBox>& allLocPreds, |
||||
const std::vector<caffe::NormalizedBBox>& priorBBoxes, |
||||
const std::vector<std::vector<float> >& priorVariances, |
||||
const size_t num, const bool shareLocation, |
||||
const int numLocClasses, const int backgroundLabelId, |
||||
const CodeType codeType, const bool varianceEncodedInTarget, |
||||
std::vector<LabelBBox>* allDecodeBBoxes) |
||||
{ |
||||
CV_Assert(allLocPreds.size() == num); |
||||
allDecodeBBoxes->clear(); |
||||
allDecodeBBoxes->resize(num); |
||||
for (size_t i = 0; i < num; ++i) |
||||
{ |
||||
// Decode predictions into bboxes.
|
||||
LabelBBox& decodeBBoxes = (*allDecodeBBoxes)[i]; |
||||
for (int c = 0; c < numLocClasses; ++c) |
||||
{ |
||||
int label = shareLocation ? -1 : c; |
||||
if (label == backgroundLabelId) |
||||
{ |
||||
// Ignore background class.
|
||||
continue; |
||||
} |
||||
if (allLocPreds[i].find(label) == allLocPreds[i].end()) |
||||
{ |
||||
// Something bad happened if there are no predictions for current label.
|
||||
util::make_error<int>("Could not find location predictions for label ", label); |
||||
} |
||||
const std::vector<caffe::NormalizedBBox>& labelLocPreds = |
||||
allLocPreds[i].find(label)->second; |
||||
DecodeBBoxes(priorBBoxes, priorVariances, |
||||
codeType, varianceEncodedInTarget, |
||||
labelLocPreds, &(decodeBBoxes[label])); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::GetPriorBBoxes(const float* priorData, const int& numPriors, |
||||
std::vector<caffe::NormalizedBBox>* priorBBoxes, |
||||
std::vector<std::vector<float> >* priorVariances) |
||||
{ |
||||
priorBBoxes->clear(); |
||||
priorVariances->clear(); |
||||
for (int i = 0; i < numPriors; ++i) |
||||
{ |
||||
int startIdx = i * 4; |
||||
caffe::NormalizedBBox bbox; |
||||
bbox.set_xmin(priorData[startIdx]); |
||||
bbox.set_ymin(priorData[startIdx + 1]); |
||||
bbox.set_xmax(priorData[startIdx + 2]); |
||||
bbox.set_ymax(priorData[startIdx + 3]); |
||||
float bboxSize = BBoxSize(bbox); |
||||
bbox.set_size(bboxSize); |
||||
priorBBoxes->push_back(bbox); |
||||
} |
||||
|
||||
for (int i = 0; i < numPriors; ++i) |
||||
{ |
||||
int startIdx = (numPriors + i) * 4; |
||||
std::vector<float> var; |
||||
for (int j = 0; j < 4; ++j) |
||||
{ |
||||
var.push_back(priorData[startIdx + j]); |
||||
} |
||||
priorVariances->push_back(var); |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::ScaleBBox(const caffe::NormalizedBBox& bbox, |
||||
const int height, const int width, |
||||
caffe::NormalizedBBox* scaleBBox) |
||||
{ |
||||
scaleBBox->set_xmin(bbox.xmin() * width); |
||||
scaleBBox->set_ymin(bbox.ymin() * height); |
||||
scaleBBox->set_xmax(bbox.xmax() * width); |
||||
scaleBBox->set_ymax(bbox.ymax() * height); |
||||
scaleBBox->clear_size(); |
||||
bool normalized = !(width > 1 || height > 1); |
||||
scaleBBox->set_size(BBoxSize(*scaleBBox, normalized)); |
||||
scaleBBox->set_difficult(bbox.difficult()); |
||||
} |
||||
|
||||
|
||||
void DetectionOutputLayer::GetLocPredictions( |
||||
const float* locData, const int num, |
||||
const int numPredsPerClass, const int numLocClasses, |
||||
const bool shareLocation, std::vector<LabelBBox>* locPreds) |
||||
{ |
||||
locPreds->clear(); |
||||
if (shareLocation) |
||||
{ |
||||
CV_Assert(numLocClasses == 1); |
||||
} |
||||
locPreds->resize(num); |
||||
for (int i = 0; i < num; ++i) |
||||
{ |
||||
LabelBBox& labelBBox = (*locPreds)[i]; |
||||
for (int p = 0; p < numPredsPerClass; ++p) |
||||
{ |
||||
int startIdx = p * numLocClasses * 4; |
||||
for (int c = 0; c < numLocClasses; ++c) |
||||
{ |
||||
int label = shareLocation ? -1 : c; |
||||
if (labelBBox.find(label) == labelBBox.end()) |
||||
{ |
||||
labelBBox[label].resize(numPredsPerClass); |
||||
} |
||||
labelBBox[label][p].set_xmin(locData[startIdx + c * 4]); |
||||
labelBBox[label][p].set_ymin(locData[startIdx + c * 4 + 1]); |
||||
labelBBox[label][p].set_xmax(locData[startIdx + c * 4 + 2]); |
||||
labelBBox[label][p].set_ymax(locData[startIdx + c * 4 + 3]); |
||||
} |
||||
} |
||||
locData += numPredsPerClass * numLocClasses * 4; |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::GetConfidenceScores( |
||||
const float* confData, const int num, |
||||
const int numPredsPerClass, const int numClasses, |
||||
std::vector<std::map<int, std::vector<float> > >* confPreds) |
||||
{ |
||||
confPreds->clear(); |
||||
confPreds->resize(num); |
||||
for (int i = 0; i < num; ++i) |
||||
{ |
||||
std::map<int, std::vector<float> >& labelScores = (*confPreds)[i]; |
||||
for (int p = 0; p < numPredsPerClass; ++p) |
||||
{ |
||||
int startIdx = p * numClasses; |
||||
for (int c = 0; c < numClasses; ++c) |
||||
{ |
||||
labelScores[c].push_back(confData[startIdx + c]); |
||||
} |
||||
} |
||||
confData += numPredsPerClass * numClasses; |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes, |
||||
const std::vector<float>& scores, |
||||
const float score_threshold, |
||||
const float nms_threshold, const int top_k, |
||||
std::vector<int>* indices) |
||||
{ |
||||
// Sanity check.
|
||||
CV_Assert(bboxes.size() == scores.size()); |
||||
|
||||
// Get top_k scores (with corresponding indices).
|
||||
std::vector<std::pair<float, int> > score_index_vec; |
||||
GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec); |
||||
|
||||
// Do nms.
|
||||
indices->clear(); |
||||
while (score_index_vec.size() != 0) |
||||
{ |
||||
const int idx = score_index_vec.front().second; |
||||
bool keep = true; |
||||
for (size_t k = 0; k < indices->size(); ++k) |
||||
{ |
||||
if (keep) |
||||
{ |
||||
const int kept_idx = (*indices)[k]; |
||||
float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]); |
||||
keep = overlap <= nms_threshold; |
||||
} |
||||
else |
||||
{ |
||||
break; |
||||
} |
||||
} |
||||
if (keep) |
||||
{ |
||||
indices->push_back(idx); |
||||
} |
||||
score_index_vec.erase(score_index_vec.begin()); |
||||
} |
||||
} |
||||
|
||||
|
||||
void DetectionOutputLayer::GetMaxScoreIndex( |
||||
const std::vector<float>& scores, const float threshold,const int top_k, |
||||
std::vector<std::pair<float, int> >* score_index_vec) |
||||
{ |
||||
// Generate index score pairs.
|
||||
for (size_t i = 0; i < scores.size(); ++i) |
||||
{ |
||||
if (scores[i] > threshold) |
||||
{ |
||||
score_index_vec->push_back(std::make_pair(scores[i], i)); |
||||
} |
||||
} |
||||
|
||||
// Sort the score pair according to the scores in descending order
|
||||
std::stable_sort(score_index_vec->begin(), score_index_vec->end(), |
||||
util::SortScorePairDescend<int>); |
||||
|
||||
// Keep top_k scores if needed.
|
||||
if (top_k > -1 && top_k < (int)score_index_vec->size()) |
||||
{ |
||||
score_index_vec->resize(top_k); |
||||
} |
||||
} |
||||
|
||||
void DetectionOutputLayer::IntersectBBox(const caffe::NormalizedBBox& bbox1, |
||||
const caffe::NormalizedBBox& bbox2, |
||||
caffe::NormalizedBBox* intersect_bbox) { |
||||
if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() || |
||||
bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin()) |
||||
{ |
||||
// Return [0, 0, 0, 0] if there is no intersection.
|
||||
intersect_bbox->set_xmin(0); |
||||
intersect_bbox->set_ymin(0); |
||||
intersect_bbox->set_xmax(0); |
||||
intersect_bbox->set_ymax(0); |
||||
} |
||||
else |
||||
{ |
||||
intersect_bbox->set_xmin(std::max(bbox1.xmin(), bbox2.xmin())); |
||||
intersect_bbox->set_ymin(std::max(bbox1.ymin(), bbox2.ymin())); |
||||
intersect_bbox->set_xmax(std::min(bbox1.xmax(), bbox2.xmax())); |
||||
intersect_bbox->set_ymax(std::min(bbox1.ymax(), bbox2.ymax())); |
||||
} |
||||
} |
||||
|
||||
float DetectionOutputLayer::JaccardOverlap(const caffe::NormalizedBBox& bbox1, |
||||
const caffe::NormalizedBBox& bbox2, |
||||
const bool normalized) { |
||||
caffe::NormalizedBBox intersect_bbox; |
||||
IntersectBBox(bbox1, bbox2, &intersect_bbox); |
||||
float intersect_width, intersect_height; |
||||
if (normalized) |
||||
{ |
||||
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin(); |
||||
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin(); |
||||
} |
||||
else |
||||
{ |
||||
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin() + 1; |
||||
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin() + 1; |
||||
} |
||||
if (intersect_width > 0 && intersect_height > 0) |
||||
{ |
||||
float intersect_size = intersect_width * intersect_height; |
||||
float bbox1_size = BBoxSize(bbox1); |
||||
float bbox2_size = BBoxSize(bbox2); |
||||
return intersect_size / (bbox1_size + bbox2_size - intersect_size); |
||||
} |
||||
else |
||||
{ |
||||
return 0.; |
||||
} |
||||
} |
||||
|
||||
} |
||||
} |
@ -0,0 +1,226 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__ |
||||
#define __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__ |
||||
|
||||
#include "../precomp.hpp" |
||||
#include "caffe.pb.h" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
class DetectionOutputLayer : public Layer |
||||
{ |
||||
unsigned _numClasses; |
||||
bool _shareLocation; |
||||
int _numLocClasses; |
||||
|
||||
int _backgroundLabelId; |
||||
|
||||
typedef caffe::PriorBoxParameter_CodeType CodeType; |
||||
CodeType _codeType; |
||||
|
||||
bool _varianceEncodedInTarget; |
||||
int _keepTopK; |
||||
float _confidenceThreshold; |
||||
|
||||
int _num; |
||||
int _numPriors; |
||||
|
||||
float _nmsThreshold; |
||||
int _topK; |
||||
|
||||
static const size_t _numAxes = 4; |
||||
static const std::string _layerName; |
||||
|
||||
public: |
||||
DetectionOutputLayer(LayerParams ¶ms); |
||||
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
|
||||
void checkInputs(const std::vector<Blob*> &inputs); |
||||
void getCodeType(LayerParams ¶ms); |
||||
|
||||
template<typename T> |
||||
T getParameter(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
const size_t &idx = 0, |
||||
const bool required = true, |
||||
const T& defaultValue = T()); |
||||
|
||||
bool getParameterDict(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
DictValue& result); |
||||
|
||||
typedef std::map<int, std::vector<caffe::NormalizedBBox> > LabelBBox; |
||||
|
||||
// Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1].
|
||||
void ClipBBox(const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* clip_bbox); |
||||
|
||||
// Decode a bbox according to a prior bbox.
|
||||
void DecodeBBox(const caffe::NormalizedBBox& prior_bbox, |
||||
const std::vector<float>& prior_variance, const CodeType code_type, |
||||
const bool variance_encoded_in_target, const caffe::NormalizedBBox& bbox, |
||||
caffe::NormalizedBBox* decode_bbox); |
||||
|
||||
// Decode a set of bboxes according to a set of prior bboxes.
|
||||
void DecodeBBoxes(const std::vector<caffe::NormalizedBBox>& prior_bboxes, |
||||
const std::vector<std::vector<float> >& prior_variances, |
||||
const CodeType code_type, const bool variance_encoded_in_target, |
||||
const std::vector<caffe::NormalizedBBox>& bboxes, |
||||
std::vector<caffe::NormalizedBBox>* decode_bboxes); |
||||
|
||||
// Decode all bboxes in a batch.
|
||||
void DecodeBBoxesAll(const std::vector<LabelBBox>& all_loc_pred, |
||||
const std::vector<caffe::NormalizedBBox>& prior_bboxes, |
||||
const std::vector<std::vector<float> >& prior_variances, |
||||
const size_t num, const bool share_location, |
||||
const int num_loc_classes, const int background_label_id, |
||||
const CodeType code_type, const bool variance_encoded_in_target, |
||||
std::vector<LabelBBox>* all_decode_bboxes); |
||||
|
||||
// Get prior bounding boxes from prior_data.
|
||||
// prior_data: 1 x 2 x num_priors * 4 x 1 blob.
|
||||
// num_priors: number of priors.
|
||||
// prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox.
|
||||
// prior_variances: stores all the variances needed by prior bboxes.
|
||||
void GetPriorBBoxes(const float* priorData, const int& numPriors, |
||||
std::vector<caffe::NormalizedBBox>* priorBBoxes, |
||||
std::vector<std::vector<float> >* priorVariances); |
||||
|
||||
// Scale the caffe::NormalizedBBox w.r.t. height and width.
|
||||
void ScaleBBox(const caffe::NormalizedBBox& bbox, const int height, const int width, |
||||
caffe::NormalizedBBox* scale_bbox); |
||||
|
||||
// Do non maximum suppression given bboxes and scores.
|
||||
// Inspired by Piotr Dollar's NMS implementation in EdgeBox.
|
||||
// https://goo.gl/jV3JYS
|
||||
// bboxes: a set of bounding boxes.
|
||||
// scores: a set of corresponding confidences.
|
||||
// score_threshold: a threshold used to filter detection results.
|
||||
// nms_threshold: a threshold used in non maximum suppression.
|
||||
// top_k: if not -1, keep at most top_k picked indices.
|
||||
// indices: the kept indices of bboxes after nms.
|
||||
void ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes, |
||||
const std::vector<float>& scores, const float score_threshold, |
||||
const float nms_threshold, const int top_k, std::vector<int>* indices); |
||||
|
||||
|
||||
// Do non maximum suppression given bboxes and scores.
|
||||
// bboxes: a set of bounding boxes.
|
||||
// scores: a set of corresponding confidences.
|
||||
// threshold: the threshold used in non maximu suppression.
|
||||
// top_k: if not -1, keep at most top_k picked indices.
|
||||
// reuse_overlaps: if true, use and update overlaps; otherwise, always
|
||||
// compute overlap.
|
||||
// overlaps: a temp place to optionally store the overlaps between pairs of
|
||||
// bboxes if reuse_overlaps is true.
|
||||
// indices: the kept indices of bboxes after nms.
|
||||
void ApplyNMS(const std::vector<caffe::NormalizedBBox>& bboxes, |
||||
const std::vector<float>& scores, |
||||
const float threshold, const int top_k, const bool reuse_overlaps, |
||||
std::map<int, std::map<int, float> >* overlaps, std::vector<int>* indices); |
||||
|
||||
void ApplyNMS(const bool* overlapped, const int num, std::vector<int>* indices); |
||||
|
||||
// Get confidence predictions from conf_data.
|
||||
// conf_data: num x num_preds_per_class * num_classes blob.
|
||||
// num: the number of images.
|
||||
// num_preds_per_class: number of predictions per class.
|
||||
// num_classes: number of classes.
|
||||
// conf_preds: stores the confidence prediction, where each item contains
|
||||
// confidence prediction for an image.
|
||||
void GetConfidenceScores(const float* conf_data, const int num, |
||||
const int num_preds_per_class, const int num_classes, |
||||
std::vector<std::map<int, std::vector<float> > >* conf_scores); |
||||
|
||||
// Get confidence predictions from conf_data.
|
||||
// conf_data: num x num_preds_per_class * num_classes blob.
|
||||
// num: the number of images.
|
||||
// num_preds_per_class: number of predictions per class.
|
||||
// num_classes: number of classes.
|
||||
// class_major: if true, data layout is
|
||||
// num x num_classes x num_preds_per_class; otherwise, data layerout is
|
||||
// num x num_preds_per_class * num_classes.
|
||||
// conf_preds: stores the confidence prediction, where each item contains
|
||||
// confidence prediction for an image.
|
||||
void GetConfidenceScores(const float* conf_data, const int num, |
||||
const int num_preds_per_class, const int num_classes, |
||||
const bool class_major, |
||||
std::vector<std::map<int, std::vector<float> > >* conf_scores); |
||||
|
||||
// Get location predictions from loc_data.
|
||||
// loc_data: num x num_preds_per_class * num_loc_classes * 4 blob.
|
||||
// num: the number of images.
|
||||
// num_preds_per_class: number of predictions per class.
|
||||
// num_loc_classes: number of location classes. It is 1 if share_location is
|
||||
// true; and is equal to number of classes needed to predict otherwise.
|
||||
// share_location: if true, all classes share the same location prediction.
|
||||
// loc_preds: stores the location prediction, where each item contains
|
||||
// location prediction for an image.
|
||||
void GetLocPredictions(const float* loc_data, const int num, |
||||
const int num_preds_per_class, const int num_loc_classes, |
||||
const bool share_location, std::vector<LabelBBox>* loc_preds); |
||||
|
||||
// Get max scores with corresponding indices.
|
||||
// scores: a set of scores.
|
||||
// threshold: only consider scores higher than the threshold.
|
||||
// top_k: if -1, keep all; otherwise, keep at most top_k.
|
||||
// score_index_vec: store the sorted (score, index) pair.
|
||||
void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold, |
||||
const int top_k, std::vector<std::pair<float, int> >* score_index_vec); |
||||
|
||||
// Compute the jaccard (intersection over union IoU) overlap between two bboxes.
|
||||
float JaccardOverlap(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2, |
||||
const bool normalized = true); |
||||
|
||||
// Compute the intersection between two bboxes.
|
||||
void IntersectBBox(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2, |
||||
caffe::NormalizedBBox* intersect_bbox); |
||||
|
||||
// Compute bbox size.
|
||||
float BBoxSize(const caffe::NormalizedBBox& bbox, const bool normalized = true); |
||||
}; |
||||
} |
||||
} |
||||
#endif |
@ -0,0 +1,117 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp" |
||||
#include "layers_common.hpp" |
||||
#include "flatten_layer.hpp" |
||||
#include <float.h> |
||||
#include <algorithm> |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
|
||||
FlattenLayer::FlattenLayer(LayerParams ¶ms) : Layer(params) |
||||
{ |
||||
_startAxis = params.get<int>("axis", 1); |
||||
_endAxis = params.get<int>("end_axis", -1); |
||||
} |
||||
|
||||
void FlattenLayer::checkInputs(const std::vector<Blob*> &inputs) |
||||
{ |
||||
CV_Assert(inputs.size() > 0); |
||||
for (size_t i = 1; i < inputs.size(); i++) |
||||
{ |
||||
for (size_t j = 0; j < _numAxes; j++) |
||||
{ |
||||
CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void FlattenLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
checkInputs(inputs); |
||||
|
||||
_numAxes = inputs[0]->dims(); |
||||
_endAxis = inputs[0]->canonicalAxis(_endAxis); |
||||
CV_Assert(_startAxis >= 0); |
||||
CV_Assert(_endAxis >= _startAxis && _endAxis < (int)_numAxes); |
||||
|
||||
size_t flattenedDimensionSize = 1; |
||||
for (int i = _startAxis; i <= _endAxis; i++) |
||||
{ |
||||
flattenedDimensionSize *= inputs[0]->size(i); |
||||
} |
||||
|
||||
std::vector<int> outputShapeVec; |
||||
for (int i = 0; i < _startAxis; i++) |
||||
{ |
||||
outputShapeVec.push_back(inputs[0]->size(i)); |
||||
} |
||||
outputShapeVec.push_back(flattenedDimensionSize); |
||||
for (size_t i = _endAxis + 1; i < _numAxes; i++) |
||||
{ |
||||
outputShapeVec.push_back(inputs[0]->size(i)); |
||||
} |
||||
CV_Assert(outputShapeVec.size() <= 4); |
||||
|
||||
resultShape = BlobShape(outputShapeVec); |
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++) |
||||
{ |
||||
//in-place
|
||||
outputs[i].shareFrom(*inputs[i]); |
||||
outputs[i].reshape(resultShape); |
||||
} |
||||
} |
||||
|
||||
void FlattenLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
for (size_t j = 0; j < inputs.size(); j++) |
||||
{ |
||||
outputs[j].shareFrom(*inputs[j]); |
||||
outputs[j].reshape(resultShape); |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,67 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__ |
||||
#define __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__ |
||||
#include "../precomp.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
class FlattenLayer : public Layer |
||||
{ |
||||
int _startAxis; |
||||
int _endAxis; |
||||
size_t _numAxes; |
||||
|
||||
BlobShape resultShape; |
||||
|
||||
public: |
||||
FlattenLayer(LayerParams ¶ms); |
||||
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
|
||||
void checkInputs(const std::vector<Blob*> &inputs); |
||||
}; |
||||
} |
||||
} |
||||
#endif |
@ -0,0 +1,201 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp" |
||||
#include "layers_common.hpp" |
||||
#include "normalize_bbox_layer.hpp" |
||||
#include "op_blas.hpp" |
||||
|
||||
#include <float.h> |
||||
#include <algorithm> |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
|
||||
const std::string NormalizeBBoxLayer::_layerName = std::string("NormalizeBBox"); |
||||
|
||||
bool NormalizeBBoxLayer::getParameterDict(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
DictValue& result) |
||||
{ |
||||
if (!params.has(parameterName)) |
||||
{ |
||||
return false; |
||||
} |
||||
|
||||
result = params.get(parameterName); |
||||
return true; |
||||
} |
||||
|
||||
template<typename T> |
||||
T NormalizeBBoxLayer::getParameter(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
const size_t &idx, |
||||
const bool required, |
||||
const T& defaultValue) |
||||
{ |
||||
DictValue dictValue; |
||||
bool success = getParameterDict(params, parameterName, dictValue); |
||||
if(!success) |
||||
{ |
||||
if(required) |
||||
{ |
||||
std::string message = _layerName; |
||||
message += " layer parameter does not contain "; |
||||
message += parameterName; |
||||
message += " parameter."; |
||||
CV_Error(Error::StsBadArg, message); |
||||
} |
||||
else |
||||
{ |
||||
return defaultValue; |
||||
} |
||||
} |
||||
return dictValue.get<T>(idx); |
||||
} |
||||
|
||||
NormalizeBBoxLayer::NormalizeBBoxLayer(LayerParams ¶ms) : Layer(params) |
||||
{ |
||||
_eps = getParameter<float>(params, "eps", 0, false, 1e-10f); |
||||
_across_spatial = getParameter<bool>(params, "across_spatial"); |
||||
_channel_shared = getParameter<bool>(params, "channel_shared"); |
||||
} |
||||
|
||||
void NormalizeBBoxLayer::checkInputs(const std::vector<Blob*> &inputs) |
||||
{ |
||||
CV_Assert(inputs.size() > 0); |
||||
for (size_t i = 1; i < inputs.size(); i++) |
||||
{ |
||||
for (size_t j = 0; j < _numAxes; j++) |
||||
{ |
||||
CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]); |
||||
} |
||||
} |
||||
CV_Assert(inputs[0]->dims() > 2); |
||||
} |
||||
|
||||
void NormalizeBBoxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
checkInputs(inputs); |
||||
|
||||
_num = inputs[0]->num(); |
||||
_channels = inputs[0]->shape()[1]; |
||||
_rows = inputs[0]->shape()[2]; |
||||
_cols = inputs[0]->shape()[3]; |
||||
|
||||
_channelSize = _rows * _cols; |
||||
_imageSize = _channelSize * _channels; |
||||
|
||||
_buffer = Mat(_channels, _channelSize, CV_32F); |
||||
|
||||
_sumChannelMultiplier = Mat(_channels, 1, CV_32F, Scalar(1.0)); |
||||
_sumSpatialMultiplier = Mat(1, _channelSize, CV_32F, Scalar(1.0)); |
||||
|
||||
_scale = blobs[0]; |
||||
|
||||
for(size_t i = 0; i < inputs.size(); i++) |
||||
{ |
||||
outputs[i].create(BlobShape(inputs[0]->shape())); |
||||
} |
||||
} |
||||
|
||||
void NormalizeBBoxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
Mat zeroBuffer(_channels, _channelSize, CV_32F, Scalar(0)); |
||||
Mat absDiff; |
||||
|
||||
for (size_t j = 0; j < inputs.size(); j++) |
||||
{ |
||||
for (size_t n = 0; n < _num; ++n) |
||||
{ |
||||
Mat src = Mat(_channels, _channelSize, CV_32F, inputs[j]->ptrf(n)); |
||||
Mat dst = Mat(_channels, _channelSize, CV_32F, outputs[j].ptrf(n)); |
||||
|
||||
_buffer = src.mul(src); |
||||
|
||||
if (_across_spatial) |
||||
{ |
||||
absdiff(_buffer, zeroBuffer, absDiff); |
||||
|
||||
// add eps to avoid overflow
|
||||
double absSum = sum(absDiff)[0] + _eps; |
||||
|
||||
float norm = sqrt(absSum); |
||||
dst = src / norm; |
||||
} |
||||
else |
||||
{ |
||||
Mat norm(_channelSize, 1, _buffer.type()); // 1 x _channelSize
|
||||
|
||||
// (_channels x_channelSize)T * _channels x 1 -> _channelSize x 1
|
||||
gemmCPU(_buffer, _sumChannelMultiplier, 1, norm, 0, GEMM_1_T); |
||||
|
||||
// compute norm
|
||||
pow(norm, 0.5f, norm); |
||||
|
||||
// scale the layer
|
||||
// _channels x 1 * (_channelSize x 1)T -> _channels x _channelSize
|
||||
gemmCPU(_sumChannelMultiplier, norm, 1, _buffer, 0, GEMM_2_T); |
||||
|
||||
dst = src / _buffer; |
||||
} |
||||
|
||||
// scale the output
|
||||
if (_channel_shared) |
||||
{ |
||||
// _scale: 1 x 1
|
||||
dst *= _scale.matRefConst().at<float>(0, 0); |
||||
} |
||||
else |
||||
{ |
||||
// _scale: _channels x 1
|
||||
// _channels x 1 * 1 x _channelSize -> _channels x _channelSize
|
||||
gemmCPU(_scale.matRefConst(), _sumSpatialMultiplier, 1, _buffer, 0); |
||||
|
||||
dst = dst.mul(_buffer); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,94 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__ |
||||
#define __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__ |
||||
#include "../precomp.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
class NormalizeBBoxLayer : public Layer |
||||
{ |
||||
Mat _buffer; |
||||
|
||||
Mat _sumChannelMultiplier; |
||||
Mat _sumSpatialMultiplier; |
||||
|
||||
Blob _scale; |
||||
|
||||
float _eps; |
||||
bool _across_spatial; |
||||
bool _channel_shared; |
||||
|
||||
size_t _num; |
||||
size_t _channels; |
||||
size_t _rows; |
||||
size_t _cols; |
||||
|
||||
size_t _channelSize; |
||||
size_t _imageSize; |
||||
|
||||
static const size_t _numAxes = 4; |
||||
static const std::string _layerName; |
||||
|
||||
public: |
||||
NormalizeBBoxLayer(LayerParams ¶ms); |
||||
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
|
||||
void checkInputs(const std::vector<Blob*> &inputs); |
||||
|
||||
template<typename T> |
||||
T getParameter(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
const size_t &idx = 0, |
||||
const bool required = true, |
||||
const T& defaultValue = T()); |
||||
|
||||
bool getParameterDict(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
DictValue& result); |
||||
}; |
||||
} |
||||
} |
||||
#endif |
@ -0,0 +1,185 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp" |
||||
#include "layers_common.hpp" |
||||
#include "permute_layer.hpp" |
||||
#include <float.h> |
||||
#include <algorithm> |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
void PermuteLayer::checkCurrentOrder(int currentOrder) |
||||
{ |
||||
if(currentOrder < 0 || currentOrder > 3) |
||||
{ |
||||
CV_Error( |
||||
Error::StsBadArg, |
||||
"Orders of dimensions in Permute layer parameter" |
||||
"must be in [0...3] interval"); |
||||
} |
||||
|
||||
if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end()) |
||||
{ |
||||
CV_Error(Error::StsBadArg, |
||||
"Permute layer parameter contains duplicated orders."); |
||||
} |
||||
} |
||||
|
||||
void PermuteLayer::checkNeedForPermutation() |
||||
{ |
||||
_needsPermute = false; |
||||
for (size_t i = 0; i < _numAxes; ++i) |
||||
{ |
||||
if (_order[i] != i) |
||||
{ |
||||
_needsPermute = true; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
PermuteLayer::PermuteLayer(LayerParams ¶ms) : Layer(params) |
||||
{ |
||||
if (!params.has("order")) |
||||
{ |
||||
_needsPermute = false; |
||||
return; |
||||
} |
||||
|
||||
DictValue paramOrder = params.get("order"); |
||||
if(paramOrder.size() > 4) |
||||
{ |
||||
CV_Error( |
||||
Error::StsBadArg, |
||||
"Too many (> 4) orders of dimensions in Permute layer"); |
||||
} |
||||
|
||||
_numAxes = paramOrder.size(); |
||||
|
||||
for (size_t i = 0; i < _numAxes; i++) |
||||
{ |
||||
int currentOrder = paramOrder.get<int>(i); |
||||
checkCurrentOrder(currentOrder); |
||||
_order.push_back(currentOrder); |
||||
} |
||||
|
||||
checkNeedForPermutation(); |
||||
} |
||||
|
||||
void PermuteLayer::computeStrides() |
||||
{ |
||||
_oldStride.resize(_numAxes); |
||||
_newStride.resize(_numAxes); |
||||
|
||||
_oldStride[_numAxes - 1] = 1; |
||||
_newStride[_numAxes - 1] = 1; |
||||
|
||||
for(int i = _numAxes - 2; i >= 0; i--) |
||||
{ |
||||
_oldStride[i] = _oldStride[i + 1] * _oldDimensionSize[i + 1]; |
||||
_newStride[i] = _newStride[i + 1] * _newDimensionSize[i + 1]; |
||||
} |
||||
|
||||
_count = _oldStride[0] * _oldDimensionSize[0]; |
||||
} |
||||
|
||||
void PermuteLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
if(!_needsPermute) |
||||
{ |
||||
return; |
||||
} |
||||
|
||||
CV_Assert(inputs.size() > 0); |
||||
CV_Assert((int)_numAxes == inputs[0]->shape().dims()); |
||||
|
||||
outputs.resize(inputs.size()); |
||||
|
||||
_oldDimensionSize = inputs[0]->shape(); |
||||
for (size_t i = 0; i < _numAxes; i++) |
||||
{ |
||||
_newDimensionSize[i] = _oldDimensionSize[_order[i]]; |
||||
} |
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++) |
||||
{ |
||||
CV_Assert(inputs[i]->rows() == _oldDimensionSize[2] && inputs[i]->cols() == _oldDimensionSize[3]); |
||||
outputs[i].create(BlobShape(_newDimensionSize)); |
||||
} |
||||
|
||||
computeStrides(); |
||||
} |
||||
|
||||
void PermuteLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
if(!_needsPermute) |
||||
{ |
||||
for (size_t j = 0; j < inputs.size(); j++) |
||||
{ |
||||
outputs[j].matRef() = inputs[j]->matRef(); |
||||
} |
||||
return; |
||||
} |
||||
|
||||
for (size_t k = 0; k < inputs.size(); k++) |
||||
{ |
||||
float *srcData = inputs[k]->ptrf(); |
||||
float *dstData = outputs[k].ptrf(); |
||||
|
||||
for (size_t i = 0; i < _count; ++i) |
||||
{ |
||||
int oldPosition = 0; |
||||
int newPosition = i; |
||||
|
||||
for (size_t j = 0; j < _numAxes; ++j) |
||||
{ |
||||
oldPosition += (newPosition / _newStride[j]) * _oldStride[_order[j]]; |
||||
newPosition %= _newStride[j]; |
||||
} |
||||
dstData[i] = srcData[oldPosition]; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,75 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__ |
||||
#define __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__ |
||||
#include "../precomp.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
class PermuteLayer : public Layer |
||||
{ |
||||
size_t _count; |
||||
std::vector<size_t> _order; |
||||
|
||||
BlobShape _oldDimensionSize; |
||||
BlobShape _newDimensionSize; |
||||
|
||||
std::vector<size_t> _oldStride; |
||||
std::vector<size_t> _newStride; |
||||
bool _needsPermute; |
||||
|
||||
size_t _numAxes; |
||||
|
||||
void checkCurrentOrder(int currentOrder); |
||||
void checkNeedForPermutation(); |
||||
void computeStrides(); |
||||
|
||||
public: |
||||
PermuteLayer(LayerParams ¶ms); |
||||
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
}; |
||||
} |
||||
} |
||||
#endif |
@ -0,0 +1,307 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "../precomp.hpp" |
||||
#include "layers_common.hpp" |
||||
#include "prior_box_layer.hpp" |
||||
#include <float.h> |
||||
#include <algorithm> |
||||
#include <cmath> |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
|
||||
const std::string PriorBoxLayer::_layerName = std::string("PriorBox"); |
||||
|
||||
bool PriorBoxLayer::getParameterDict(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
DictValue& result) |
||||
{ |
||||
if (!params.has(parameterName)) |
||||
{ |
||||
return false; |
||||
} |
||||
|
||||
result = params.get(parameterName); |
||||
return true; |
||||
} |
||||
|
||||
template<typename T> |
||||
T PriorBoxLayer::getParameter(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
const size_t &idx, |
||||
const bool required, |
||||
const T& defaultValue) |
||||
{ |
||||
DictValue dictValue; |
||||
bool success = getParameterDict(params, parameterName, dictValue); |
||||
if(!success) |
||||
{ |
||||
if(required) |
||||
{ |
||||
std::string message = _layerName; |
||||
message += " layer parameter does not contain "; |
||||
message += parameterName; |
||||
message += " parameter."; |
||||
CV_Error(Error::StsBadArg, message); |
||||
} |
||||
else |
||||
{ |
||||
return defaultValue; |
||||
} |
||||
} |
||||
return dictValue.get<T>(idx); |
||||
} |
||||
|
||||
void PriorBoxLayer::getAspectRatios(const LayerParams ¶ms) |
||||
{ |
||||
DictValue aspectRatioParameter; |
||||
bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter); |
||||
CV_Assert(aspectRatioRetieved); |
||||
|
||||
for (int i = 0; i < aspectRatioParameter.size(); ++i) |
||||
{ |
||||
float aspectRatio = aspectRatioParameter.get<float>(i); |
||||
bool alreadyExists = false; |
||||
|
||||
for (size_t j = 0; j < _aspectRatios.size(); ++j) |
||||
{ |
||||
if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6) |
||||
{ |
||||
alreadyExists = true; |
||||
break; |
||||
} |
||||
} |
||||
if (!alreadyExists) |
||||
{ |
||||
_aspectRatios.push_back(aspectRatio); |
||||
if (_flip) |
||||
{ |
||||
_aspectRatios.push_back(1./aspectRatio); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
void PriorBoxLayer::getVariance(const LayerParams ¶ms) |
||||
{ |
||||
DictValue varianceParameter; |
||||
bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter); |
||||
CV_Assert(varianceParameterRetrieved); |
||||
|
||||
int varianceSize = varianceParameter.size(); |
||||
if (varianceSize > 1) |
||||
{ |
||||
// Must and only provide 4 variance.
|
||||
CV_Assert(varianceSize == 4); |
||||
|
||||
for (int i = 0; i < varianceSize; ++i) |
||||
{ |
||||
float variance = varianceParameter.get<float>(i); |
||||
CV_Assert(variance > 0); |
||||
_variance.push_back(variance); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if (varianceSize == 1) |
||||
{ |
||||
float variance = varianceParameter.get<float>(0); |
||||
CV_Assert(variance > 0); |
||||
_variance.push_back(variance); |
||||
} |
||||
else |
||||
{ |
||||
// Set default to 0.1.
|
||||
_variance.push_back(0.1f); |
||||
} |
||||
} |
||||
} |
||||
|
||||
PriorBoxLayer::PriorBoxLayer(LayerParams ¶ms) : Layer(params) |
||||
{ |
||||
_minSize = getParameter<unsigned>(params, "min_size"); |
||||
CV_Assert(_minSize > 0); |
||||
|
||||
_flip = getParameter<bool>(params, "flip"); |
||||
_clip = getParameter<bool>(params, "clip"); |
||||
|
||||
_aspectRatios.clear(); |
||||
_aspectRatios.push_back(1.); |
||||
|
||||
getAspectRatios(params); |
||||
getVariance(params); |
||||
|
||||
_numPriors = _aspectRatios.size(); |
||||
|
||||
_maxSize = -1; |
||||
if (params.has("max_size")) |
||||
{ |
||||
_maxSize = params.get("max_size").get<float>(0); |
||||
CV_Assert(_maxSize > _minSize); |
||||
|
||||
_numPriors += 1; |
||||
} |
||||
} |
||||
|
||||
void PriorBoxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
CV_Assert(inputs.size() == 2); |
||||
|
||||
_layerWidth = inputs[0]->cols(); |
||||
_layerHeight = inputs[0]->rows(); |
||||
|
||||
_imageWidth = inputs[1]->cols(); |
||||
_imageHeight = inputs[1]->rows(); |
||||
|
||||
_stepX = static_cast<float>(_imageWidth) / _layerWidth; |
||||
_stepY = static_cast<float>(_imageHeight) / _layerHeight; |
||||
|
||||
// Since all images in a batch has same height and width, we only need to
|
||||
// generate one set of priors which can be shared across all images.
|
||||
size_t outNum = 1; |
||||
// 2 channels. First channel stores the mean of each prior coordinate.
|
||||
// Second channel stores the variance of each prior coordinate.
|
||||
size_t outChannels = 2; |
||||
_outChannelSize = _layerHeight * _layerWidth * _numPriors * 4; |
||||
|
||||
outputs[0].create(BlobShape(outNum, outChannels, _outChannelSize)); |
||||
outputs[0].matRef() = 0; |
||||
} |
||||
|
||||
void PriorBoxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
||||
{ |
||||
(void)inputs; // to suppress unused parameter warning
|
||||
|
||||
float* outputPtr = outputs[0].ptrf(); |
||||
|
||||
// first prior: aspect_ratio = 1, size = min_size
|
||||
int idx = 0; |
||||
for (size_t h = 0; h < _layerHeight; ++h) |
||||
{ |
||||
for (size_t w = 0; w < _layerWidth; ++w) |
||||
{ |
||||
_boxWidth = _boxHeight = _minSize; |
||||
|
||||
float center_x = (w + 0.5) * _stepX; |
||||
float center_y = (h + 0.5) * _stepY; |
||||
// xmin
|
||||
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; |
||||
// ymin
|
||||
outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight; |
||||
// xmax
|
||||
outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; |
||||
// ymax
|
||||
outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; |
||||
|
||||
if (_maxSize > 0) |
||||
{ |
||||
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
|
||||
_boxWidth = _boxHeight = sqrt(_minSize * _maxSize); |
||||
// xmin
|
||||
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; |
||||
// ymin
|
||||
outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight; |
||||
// xmax
|
||||
outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; |
||||
// ymax
|
||||
outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; |
||||
} |
||||
|
||||
// rest of priors
|
||||
for (size_t r = 0; r < _aspectRatios.size(); ++r) |
||||
{ |
||||
float ar = _aspectRatios[r]; |
||||
if (fabs(ar - 1.) < 1e-6) |
||||
{ |
||||
continue; |
||||
} |
||||
_boxWidth = _minSize * sqrt(ar); |
||||
_boxHeight = _minSize / sqrt(ar); |
||||
// xmin
|
||||
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; |
||||
// ymin
|
||||
outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight; |
||||
// xmax
|
||||
outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; |
||||
// ymax
|
||||
outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; |
||||
} |
||||
} |
||||
} |
||||
// clip the prior's coordidate such that it is within [0, 1]
|
||||
if (_clip) |
||||
{ |
||||
for (size_t d = 0; d < _outChannelSize; ++d) |
||||
{ |
||||
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.); |
||||
} |
||||
} |
||||
// set the variance.
|
||||
outputPtr = outputs[0].ptrf(0, 1); |
||||
if(_variance.size() == 1) |
||||
{ |
||||
Mat secondChannel(outputs[0].rows(), outputs[0].cols(), CV_32F, outputPtr); |
||||
secondChannel.setTo(Scalar(_variance[0])); |
||||
} |
||||
else |
||||
{ |
||||
int count = 0; |
||||
for (size_t h = 0; h < _layerHeight; ++h) |
||||
{ |
||||
for (size_t w = 0; w < _layerWidth; ++w) |
||||
{ |
||||
for (size_t i = 0; i < _numPriors; ++i) |
||||
{ |
||||
for (int j = 0; j < 4; ++j) |
||||
{ |
||||
outputPtr[count] = _variance[j]; |
||||
++count; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,101 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__ |
||||
#define __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__ |
||||
#include "../precomp.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
class PriorBoxLayer : public Layer |
||||
{ |
||||
size_t _layerWidth; |
||||
size_t _layerHeight; |
||||
|
||||
size_t _imageWidth; |
||||
size_t _imageHeight; |
||||
|
||||
size_t _outChannelSize; |
||||
|
||||
float _stepX; |
||||
float _stepY; |
||||
|
||||
float _minSize; |
||||
float _maxSize; |
||||
|
||||
float _boxWidth; |
||||
float _boxHeight; |
||||
|
||||
std::vector<float> _aspectRatios; |
||||
std::vector<float> _variance; |
||||
|
||||
bool _flip; |
||||
bool _clip; |
||||
|
||||
size_t _numPriors; |
||||
|
||||
static const size_t _numAxes = 4; |
||||
static const std::string _layerName; |
||||
|
||||
public: |
||||
PriorBoxLayer(LayerParams ¶ms); |
||||
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
||||
|
||||
template<typename T> |
||||
T getParameter(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
const size_t &idx = 0, |
||||
const bool required = true, |
||||
const T& defaultValue = T()); |
||||
|
||||
bool getParameterDict(const LayerParams ¶ms, |
||||
const std::string ¶meterName, |
||||
DictValue& result); |
||||
|
||||
void getAspectRatios(const LayerParams ¶ms); |
||||
void getVariance(const LayerParams ¶ms); |
||||
}; |
||||
} |
||||
} |
||||
#endif |
Loading…
Reference in new issue