diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 7e7a47199..8b7cdfbbb 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -29,6 +29,10 @@ else() ) endif() +if(ANDROID) + add_definitions(-DDISABLE_POSIX_MEMALIGN -DTH_DISABLE_HEAP_TRACKING) +endif() + # ---------------------------------------------------------------------------- # Resolve libprotobuf dependency # ---------------------------------------------------------------------------- @@ -55,7 +59,7 @@ endif() # ---------------------------------------------------------------------------- # Torch7 importer of blobs and models, produced by Torch.nn module # ---------------------------------------------------------------------------- -OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer (experimental functionality!)" OFF) +OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer" ON) if(${the_module}_BUILD_TORCH_IMPORTER) add_definitions(-DENABLE_TORCH_IMPORTER=1) ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4702 /wd4127 /wd4267) #supress warnings in original torch files diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index ace63ddfe..9d26b35e7 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -209,7 +209,7 @@ namespace dnn { public: - CV_PROP_RW Size kernel, stride, pad, dilation; + CV_PROP_RW Size kernel, stride, pad, dilation, adjustPad; CV_PROP_RW String padMode; }; @@ -224,7 +224,7 @@ namespace dnn { public: - static CV_WRAP Ptr create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1)); + static CV_WRAP Ptr create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1), Size adjustPad = Size()); }; class CV_EXPORTS_W LRNLayer : public Layer @@ -341,6 +341,12 @@ namespace dnn static CV_WRAP Ptr create(double negativeSlope = 0); }; + class CV_EXPORTS_W ChannelsPReLULayer : public Layer + { + public: + static CV_WRAP Ptr create(); + }; + class CV_EXPORTS_W TanHLayer : public Layer { public: @@ -397,6 +403,18 @@ namespace dnn static Ptr create(EltwiseOp op, const std::vector &coeffs); }; + class CV_EXPORTS_W BatchNormLayer : public Layer + { + public: + static CV_WRAP Ptr create(float eps, bool has_weights, bool has_bias); + }; + + class CV_EXPORTS_W MaxUnpoolLayer : public Layer + { + public: + static CV_WRAP Ptr create(Size unpoolSize); + }; + //! @} //! @} diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index ca4a6abd5..41d975bdf 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -270,6 +270,9 @@ namespace dnn //! This namespace is used for dnn module functionlaity. */ CV_WRAP Blob getParam(LayerId layer, int numParam = 0); + /** @brief Returns indexes of layers with unconnected outputs. + */ + CV_WRAP std::vector getUnconnectedOutLayers() const; private: struct Impl; diff --git a/modules/dnn/samples/torch_enet.cpp b/modules/dnn/samples/torch_enet.cpp new file mode 100644 index 000000000..74939a26a --- /dev/null +++ b/modules/dnn/samples/torch_enet.cpp @@ -0,0 +1,176 @@ +/* +Sample of using OpenCV dnn module with Torch ENet model. +*/ + +#include +#include +#include +using namespace cv; +using namespace cv::dnn; + +#include +#include +#include +#include +using namespace std; + +const String keys = + "{help h || Sample app for loading ENet Torch model. " + "The model and class names list can be downloaded here: " + "https://www.dropbox.com/sh/dywzk3gyb12hpe5/AAD5YkUa8XgMpHs2gCRgmCVCa }" + "{model m || path to Torch .net model file (model_best.net) }" + "{image i || path to image file }" + "{i_blob | .0 | input blob name) }" + "{o_blob || output blob name) }" + "{c_names c || path to file with classnames for channels (categories.txt) }" + "{result r || path to save output blob (optional, binary format, NCHW order) }" + ; + +std::vector readClassNames(const char *filename); + +int main(int argc, char **argv) +{ + cv::CommandLineParser parser(argc, argv, keys); + + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + String modelFile = parser.get("model"); + String imageFile = parser.get("image"); + String inBlobName = parser.get("i_blob"); + String outBlobName = parser.get("o_blob"); + + if (!parser.check()) + { + parser.printErrors(); + return 0; + } + + String classNamesFile = parser.get("c_names"); + String resultFile = parser.get("result"); + + //! [Create the importer of TensorFlow model] + Ptr importer; + try //Try to import TensorFlow AlexNet model + { + importer = dnn::createTorchImporter(modelFile); + } + catch (const cv::Exception &err) //Importer can throw errors, we will catch them + { + std::cerr << err.msg << std::endl; + } + //! [Create the importer of Caffe model] + + if (!importer) + { + std::cerr << "Can't load network by using the mode file: " << std::endl; + std::cerr << modelFile << std::endl; + exit(-1); + } + + //! [Initialize network] + dnn::Net net; + importer->populateNet(net); + importer.release(); //We don't need importer anymore + //! [Initialize network] + + //! [Prepare blob] + Mat img = imread(imageFile); + if (img.empty()) + { + std::cerr << "Can't read image from the file: " << imageFile << std::endl; + exit(-1); + } + + cv::Size inputImgSize = cv::Size(512, 512); + + if (inputImgSize != img.size()) + resize(img, img, inputImgSize); //Resize image to input size + + if(img.channels() == 3) + cv::cvtColor(img, img, cv::COLOR_BGR2RGB); + + img.convertTo(img, CV_32F, 1/255.0); + + dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob image batch + //! [Prepare blob] + + //! [Set input blob] + net.setBlob(inBlobName, inputBlob); //set the network input + //! [Set input blob] + + cv::TickMeter tm; + tm.start(); + + //! [Make forward pass] + net.forward(); //compute output + //! [Make forward pass] + + tm.stop(); + + //! [Gather output] + dnn::Blob prob = net.getBlob(outBlobName); //gather output of "prob" layer + + Mat& result = prob.matRef(); + + BlobShape shape = prob.shape(); + + if (!resultFile.empty()) { + CV_Assert(result.isContinuous()); + + ofstream fout(resultFile.c_str(), ios::out | ios::binary); + fout.write((char*)result.data, result.total() * sizeof(float)); + fout.close(); + } + + std::cout << "Output blob shape " << shape << std::endl; + std::cout << "Inference time, ms: " << tm.getTimeMilli() << std::endl; + + std::vector classNames; + if(!classNamesFile.empty()) { + classNames = readClassNames(classNamesFile.c_str()); + if (classNames.size() > prob.channels()) + classNames = std::vector(classNames.begin() + classNames.size() - prob.channels(), + classNames.end()); + } + + for(int i_c = 0; i_c < prob.channels(); i_c++) { + ostringstream convert; + convert << "Channel #" << i_c; + + if(classNames.size() == prob.channels()) + convert << ": " << classNames[i_c]; + + imshow(convert.str().c_str(), prob.getPlane(0, i_c)); + } + waitKey(); + + return 0; +} //main + + +std::vector readClassNames(const char *filename) +{ + std::vector classNames; + + std::ifstream fp(filename); + if (!fp.is_open()) + { + std::cerr << "File with classes labels not found: " << filename << std::endl; + exit(-1); + } + + std::string name; + while (!fp.eof()) + { + std::getline(fp, name); + if (name.length()) + classNames.push_back(name); + } + + fp.close(); + return classNames; +} diff --git a/modules/dnn/src/caffe/layer_loaders.cpp b/modules/dnn/src/caffe/layer_loaders.cpp index 40a7e567a..8b3372003 100644 --- a/modules/dnn/src/caffe/layer_loaders.cpp +++ b/modules/dnn/src/caffe/layer_loaders.cpp @@ -23,6 +23,9 @@ static void initConvDeconvLayerFromCaffe(Ptr l, LayerParam int numOutput = params.get("num_output"); int group = params.get("group", 1); + l->adjustPad.height = params.get("adj_h", 0); + l->adjustPad.width = params.get("adj_w", 0); + CV_Assert(numOutput % group == 0); CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1)); } @@ -40,6 +43,7 @@ Ptr createLayerFromCaffe(LayerParams ¶ms) { Ptr l = DeconvolutionLayer::create(); initConvDeconvLayerFromCaffe(l, params); + return Ptr(l); } @@ -248,7 +252,7 @@ Ptr createLayerFromCaffe(LayerParams& params) return Ptr(CropLayer::create(start_axis, offset)); } -template<> //Power specialization +template<> //Eltwise specialization Ptr createLayerFromCaffe(LayerParams& params) { EltwiseLayer::EltwiseOp op = EltwiseLayer::SUM; @@ -278,6 +282,42 @@ Ptr createLayerFromCaffe(LayerParams& params) return Ptr(EltwiseLayer::create(op, coeffs)); } +template<> //BatchNormLayer specialization +Ptr createLayerFromCaffe(LayerParams& params) +{ + const std::vector &blobs = params.blobs; + CV_Assert(blobs.size() == 4); + + float eps = params.get("eps"); + bool hasWeights = params.get("has_weight", false); + bool hasBias = params.get("has_bias", false); + + Ptr l = BatchNormLayer::create(eps, hasWeights, hasBias); + l->setParamsFrom(params); + + return Ptr(l); +} + +template<> //ChannelsPReLULayer specialization +Ptr createLayerFromCaffe(LayerParams& params) +{ + CV_Assert(params.blobs.size() == 1); + Ptr l = ChannelsPReLULayer::create(); + l->setParamsFrom(params); + + return Ptr(l); +} + +template<> //MaxUnpoolLayer specialization +Ptr createLayerFromCaffe(LayerParams& params) +{ + Size outSize(params.get("out_w"), + params.get("out_h")); + Ptr l = MaxUnpoolLayer::create(outSize); + + return Ptr(l); +} + //Explicit instantiation template Ptr createLayerFromCaffe(LayerParams&); template Ptr createLayerFromCaffe(LayerParams&); @@ -299,6 +339,9 @@ template Ptr createLayerFromCaffe(LayerParams&); template Ptr createLayerFromCaffe(LayerParams&); template Ptr createLayerFromCaffe(LayerParams&); +template Ptr createLayerFromCaffe(LayerParams&); +template Ptr createLayerFromCaffe(LayerParams&); +template Ptr createLayerFromCaffe(LayerParams&); } } diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 5c009272f..9d9748df8 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -592,6 +592,24 @@ bool Net::empty() const return impl->layers.size() <= 1; //first layer is default Data layer } +std::vector Net::getUnconnectedOutLayers() const +{ + std::vector layersIds; + + Impl::MapIdToLayerData::iterator it; + for (it = impl->layers.begin(); it != impl->layers.end(); it++) + { + int lid = it->first; + LayerData &ld = it->second; + + if (ld.requiredOutputs.size() == 0) + layersIds.push_back(lid); + } + + return layersIds; +} + + ////////////////////////////////////////////////////////////////////////// Importer::~Importer() {} diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 5b675bdef..c10b21e62 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -51,6 +51,7 @@ #include "layers/detection_output_layer.hpp" #include "layers/normalize_bbox_layer.hpp" #include "layers/shift_layer.hpp" +#include "layers/padding_layer.hpp" namespace cv { @@ -89,11 +90,14 @@ void initModule() REG_RUNTIME_LAYER_FUNC(MVN, createLayerFromCaffe); REG_RUNTIME_LAYER_FUNC(ReLU, createLayerFromCaffe); + REG_RUNTIME_LAYER_FUNC(ChannelsPReLU, createLayerFromCaffe); REG_RUNTIME_LAYER_FUNC(Sigmoid, createLayerFromCaffe); REG_RUNTIME_LAYER_FUNC(TanH, createLayerFromCaffe); REG_RUNTIME_LAYER_FUNC(BNLL, createLayerFromCaffe); REG_RUNTIME_LAYER_FUNC(AbsVal, createLayerFromCaffe); REG_RUNTIME_LAYER_FUNC(Power, createLayerFromCaffe); + REG_RUNTIME_LAYER_FUNC(BatchNorm, createLayerFromCaffe); + REG_RUNTIME_LAYER_FUNC(MaxUnpool, createLayerFromCaffe); REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer); REG_RUNTIME_LAYER_CLASS(Identity, BlankLayer); @@ -104,6 +108,7 @@ void initModule() REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer); REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer); REG_RUNTIME_LAYER_CLASS(Shift, ShiftLayer); + REG_RUNTIME_LAYER_CLASS(Padding, PaddingLayer); init.status = true; } diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp new file mode 100644 index 000000000..e2a651d66 --- /dev/null +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +/* +Implementation of Batch Normalization layer. +*/ + +#include "batch_norm_layer.hpp" + +namespace cv +{ +namespace dnn +{ + +BatchNormLayerImpl::BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_): + eps(eps_), + hasWeights(hasWeights_), + hasBias(hasBias_) +{} + +void BatchNormLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +{ + CV_Assert(blobs.size() == 4); + + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + outputs[i].create(inputs[i]->shape()); + } +} + +void BatchNormLayerImpl::forward(std::vector &inputs, std::vector &outputs) +{ + CV_Assert(inputs.size() == 1); + + Blob &inpBlob = *inputs[0]; + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + Blob &outBlob = outputs[ii]; + + if (hasWeights) + CV_Assert(inpBlob.channels() == blobs[2].total()); + + if (hasBias) + CV_Assert(inpBlob.channels() == blobs[3].total()); + + for (int n = 0; n < inpBlob.channels(); n++) + { + float mean = blobs[0].matRefConst().at(n); + float invstd = 1 / sqrt(blobs[1].matRefConst().at(n) + eps); + float w = hasWeights ? blobs[2].matRefConst().at(n) : 1; + float b = hasBias ? blobs[3].matRefConst().at(n) : 0; + outBlob.getPlane(0, n) = (inpBlob.getPlane(0, n) - mean)*(w*invstd) + b; + } + } +} + +Ptr BatchNormLayer::create(float eps, bool has_weights, bool has_bias) +{ + return Ptr(new BatchNormLayerImpl(eps, has_weights, has_bias)); +} + +} // namespace dnn +} // namespace cv diff --git a/modules/dnn/src/layers/batch_norm_layer.hpp b/modules/dnn/src/layers/batch_norm_layer.hpp new file mode 100644 index 000000000..ebc69a336 --- /dev/null +++ b/modules/dnn/src/layers/batch_norm_layer.hpp @@ -0,0 +1,37 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +/* +Declaration of Batch Normalization layer. +*/ + +#ifndef __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__ +#define __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__ +#include + +namespace cv +{ +namespace dnn +{ + +class BatchNormLayerImpl : public BatchNormLayer +{ +public: + BatchNormLayerImpl(float eps_, bool hasWeights_, bool hasBias_); + + void allocate(const std::vector &inputs, std::vector &outputs); + + void forward(std::vector &inputs, std::vector &outputs); + +private: + float eps; + bool hasWeights, hasBias; +}; + +} +} +#endif // BATCH_NORM_LAYER_HPP diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index d10df0131..8a3dafabd 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -53,12 +53,14 @@ namespace cv namespace dnn { -ConvolutionLayerImpl::ConvolutionLayerImpl() +BaseConvolutionLayerImpl::BaseConvolutionLayerImpl(): + numOutput(-1), group(-1), + inpH(0), inpW(0), inpCn(0), + outH(0), outW(0), outCn(0), + inpGroupCn(0), outGroupCn(0), + ksize(0), colBlobCols(0), + bias(false), tryUseOpenCL(false) { - tryUseOpenCL = false; //true; - numOutput = -1; - group = -1; - #if HAVE_CBLAS if (getBlasThreads() != cv::getThreadNum()) { @@ -67,37 +69,23 @@ ConvolutionLayerImpl::ConvolutionLayerImpl() #endif } -void ConvolutionLayerImpl::init() +void BaseConvolutionLayerImpl::init() { - CV_Assert(1 <= blobs.size() && blobs.size() <= 2); - - bias = (blobs.size() >= 2); - numOutput = blobs[0].num(); - + CV_Assert(blobs.size() >= 1 && blobs.size() <= 2); CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height); - CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num()); - //TODO: dilation in OCL mode + bias = (blobs.size() >= 2); useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1); } -void ConvolutionLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +void BaseConvolutionLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) { + CV_Assert(inputs.size() > 0); + init(); - CV_Assert(inputs.size() > 0); const Blob &input = *inputs[0]; CV_Assert(input.dims() == 4 && (input.type() == CV_32F || input.type() == CV_64F)); - computeInpOutShape(input); - - group = inpCn / blobs[0].channels(); - CV_Assert(inpCn % group == 0 && outCn % group == 0); - CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group); - - outGroupCn = outCn / group; - inpGroupCn = inpCn / group; - ksize = inpGroupCn * kernel.height * kernel.width; - for (size_t i = 0; i < inputs.size(); i++) { CV_Assert(inputs[i]->type() == input.type()); @@ -105,36 +93,73 @@ void ConvolutionLayerImpl::allocate(const std::vector &inputs, std::vecto CV_Assert(inputs[i]->rows() == input.rows() && inputs[i]->cols() == input.cols()); } - int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; + computeInpOutShape(input); - if (!is1x1()) - { - colBlob.create(Shape(ksize, outH * outW), input.type(), allocFlags); - } + int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; if (bias) { - biasOnesBlob.create(Shape(1, topH * topW), input.type(), allocFlags); + biasOnesBlob.create(Shape(1, outH * outW), input.type(), allocFlags); biasOnesBlob.setTo(1); } outputs.resize(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { - outputs[i].create(Shape(inputs[i]->num(), topCn, topH, topW), input.type(), allocFlags); + outputs[i].create(Shape(inputs[i]->num(), outCn, outH, outW), input.type(), allocFlags); + } + + if (!is1x1()) + { + colBlob.create(Shape(ksize, colBlobCols), input.type(), allocFlags); } } -bool ConvolutionLayerImpl::is1x1() const +bool BaseConvolutionLayerImpl::is1x1() const { return (kernel.height == 1 && kernel.width == 1) && (stride.height == 1 && stride.width == 1) && (dilation.height == 1 && dilation.width == 1); } +void ConvolutionLayerImpl::computeInpOutShape(const Blob &input) +{ + CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num()); + + numOutput = blobs[0].num(); + + inpH = input.rows(); + inpW = input.cols(); + inpCn = input.channels(); + outCn = numOutput; + + if (padMode.empty()) + { + outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1; + outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1; + } + else + { + getConvPoolOutParams(inpH, inpW, kernel, stride, pad, padMode, outH, outW); + } + + group = inpCn / blobs[0].channels(); + + CV_Assert(inpCn % group == 0 && outCn % group == 0); + CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group); + + outGroupCn = outCn / group; + inpGroupCn = inpCn / group; + ksize = inpGroupCn * kernel.height * kernel.width; + + colBlobCols = outH * outW; +} + template void ConvolutionLayerImpl::forward_(std::vector &inputs, std::vector &outputs) { + CV_Assert(inputs.size() > 0); + XMat weightsMat = reshaped(blobs[0].getRefConst(), Shape(outCn, ksize)); XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst(), Shape(outCn, 1)) : XMat(); @@ -213,44 +238,33 @@ void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol) dstCol = colMat; } -void ConvolutionLayerImpl::computeInpOutShape(const Blob &input) -{ - inpH = input.rows(); - inpW = input.cols(); - inpCn = input.channels(); - outCn = numOutput; - - if (padMode.empty()) - { - outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1; - outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1; - } - else - { - getConvPoolOutParams(inpH, inpW, kernel, stride, pad, padMode, outH, outW); - } - - topH = outH; topW = outW; topCn = outCn; -} - //Deconvolution -DeConvolutionLayerImpl::DeConvolutionLayerImpl() +void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob) { + BlobShape bs0 = blobs[0].shape(); + BlobShape bs1 = blobs[1].shape(); + CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].channels()); -} + numOutput = blobs[0].channels(); -void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob) -{ - outH = inpBlob.rows(); - outW = inpBlob.cols(); - outCn = inpBlob.channels(); + inpH = inpBlob.rows(); + inpW = inpBlob.cols(); + inpCn = inpBlob.channels(); - inpH = stride.height * (outH - 1) + kernel.height - 2 * pad.height; - inpW = stride.width * (outW - 1) + kernel.width - 2 * pad.width; - inpCn = numOutput; + outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height; + outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width; + outCn = numOutput; - topH = inpH; topW = inpW; topCn = inpCn; + group = inpCn / blobs[0].num(); + outGroupCn = outCn / group; + inpGroupCn = inpCn / group; + ksize = outGroupCn * kernel.height * kernel.width; + + CV_Assert(inpCn % group == 0 && outCn % group == 0); + CV_Assert(blobs[0].channels() == outCn && blobs[0].num() == inpCn / group); + + colBlobCols = inpH * inpW; } void DeConvolutionLayerImpl::forward(std::vector &inputs, std::vector &outputs) @@ -264,24 +278,24 @@ void DeConvolutionLayerImpl::forward(std::vector &inputs, std::vector void DeConvolutionLayerImpl::forward_(std::vector &inputs, std::vector &outputs) { - XMat weightsMat = reshaped(blobs[0].getRefConst(), Shape(outCn, ksize)); + XMat weightsMat = reshaped(blobs[0].getRefConst(), Shape(inpCn, ksize)); XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst(), Shape(outCn, 1)) : XMat(); for (size_t ii = 0; ii < outputs.size(); ii++) { int numImg = inputs[ii]->size(0); - XMat convBlob = reshaped(inputs[ii]->getRefConst(), Shape(numImg*outCn, outH*outW)); - XMat decnBlob = reshaped(outputs[ii].getRef(), Shape(numImg*inpCn, inpH*inpW)); + XMat convBlob = reshaped(inputs[ii]->getRefConst(), Shape(numImg*inpCn, inpH*inpW)); + XMat decnBlob = reshaped(outputs[ii].getRef(), Shape(numImg*outCn, outH*outW)); for (int n = 0; n < numImg; n++) { for (int g = 0; g < group; g++) { - XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn)); + XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn)); XMat &colMat = (is1x1()) ? dstMat : colBlob.getRef(); - XMat convMat = convBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn)); - XMat wghtMat = weightsMat.rowRange(_Range(g * outGroupCn, outGroupCn)); + XMat convMat = convBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn)); + XMat wghtMat = weightsMat.rowRange(_Range(g * inpGroupCn, inpGroupCn)); dnn::gemm(wghtMat, convMat, 1, colMat, 0, GEMM_1_T); @@ -306,7 +320,7 @@ void DeConvolutionLayerImpl::col2im(const Mat &colMat, Mat &dstImg) return; } if (dstImg.type() == CV_32F) - col2im_CpuPBody::run(colMat.ptr(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr()); + col2im_CpuPBody::run(colMat.ptr(), outGroupCn, outH, outW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr()); if (dstImg.type() == CV_64F) col2im_CpuPBody::run(colMat.ptr(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr()); } @@ -338,13 +352,15 @@ Ptr ConvolutionLayer::create(Size kernel, Size stride, Siz return Ptr(l); } -Ptr DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation) +Ptr DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation, Size adjustPad) { DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl(); l->kernel = kernel; l->pad = pad; l->stride = stride; l->dilation = dilation; + l->adjustPad = adjustPad; + return Ptr(l); } diff --git a/modules/dnn/src/layers/convolution_layer.hpp b/modules/dnn/src/layers/convolution_layer.hpp index 3b59bb638..f9baca2c5 100644 --- a/modules/dnn/src/layers/convolution_layer.hpp +++ b/modules/dnn/src/layers/convolution_layer.hpp @@ -49,30 +49,38 @@ namespace cv namespace dnn { -//TODO: simultaneously convolution and bias addition for cache optimization -class ConvolutionLayerImpl : public ConvolutionLayer +class BaseConvolutionLayerImpl : public ConvolutionLayer { public: - - ConvolutionLayerImpl(); + BaseConvolutionLayerImpl(); virtual void allocate(const std::vector &inputs, std::vector &outputs); - virtual void forward(std::vector &inputs, std::vector &outputs); - virtual void init(); protected: + void init(); + virtual void computeInpOutShape(const Blob &inpBlob) = 0; + bool is1x1() const; + int numOutput, group; int inpH, inpW, inpCn; int outH, outW, outCn; - int topH, topW, topCn; //switched between inp/out on deconv/conv int inpGroupCn, outGroupCn; int ksize; + int colBlobCols; bool bias; bool tryUseOpenCL, useOpenCL; Blob colBlob, biasOnesBlob; - bool is1x1() const; +}; + +//TODO: simultaneously convolution and bias addition for cache optimization +class ConvolutionLayerImpl : public BaseConvolutionLayerImpl +{ +public: + virtual void forward(std::vector &inputs, std::vector &outputs); + +protected: virtual void computeInpOutShape(const Blob &inpBlob); template @@ -81,10 +89,9 @@ protected: void im2col(const UMat &srcImg, UMat &dstCol); }; -class DeConvolutionLayerImpl : public ConvolutionLayerImpl +class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl { public: - DeConvolutionLayerImpl(); virtual void forward(std::vector &inputs, std::vector &outputs); protected: diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 6ed755811..11d772e51 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -1,5 +1,6 @@ #include "../precomp.hpp" #include "elementwise_layers.hpp" +#include "opencv2/imgproc.hpp" namespace cv { @@ -42,5 +43,45 @@ Ptr PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, d return Ptr(new ElementWiseLayer(f)); } +//////////////////////////////////////////////////////////////////////////// + +void ChannelsPReLULayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +{ + CV_Assert(blobs.size() == 1); + + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + outputs[i].create(inputs[i]->shape()); + } +} + +void ChannelsPReLULayerImpl::forward(std::vector &inputs, std::vector &outputs) +{ + CV_Assert(inputs.size() == 1); + + Blob &inpBlob = *inputs[0]; + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + Blob &outBlob = outputs[ii]; + + CV_Assert(blobs[0].total() == inpBlob.channels()); + + for (int n = 0; n < inpBlob.channels(); n++) + { + float slopeWeight = blobs[0].matRefConst().at(n); + + cv::threshold(inpBlob.getPlane(0, n), outBlob.getPlane(0, n), 0, 0, cv::THRESH_TOZERO_INV); + outBlob.getPlane(0, n) = inpBlob.getPlane(0, n) + (slopeWeight - 1)*outBlob.getPlane(0, n); + } + } +} + +Ptr ChannelsPReLULayer::create() +{ + return Ptr(new ChannelsPReLULayerImpl()); +} + +} } -} \ No newline at end of file diff --git a/modules/dnn/src/layers/elementwise_layers.hpp b/modules/dnn/src/layers/elementwise_layers.hpp index 2f67f0ae8..0331433c7 100644 --- a/modules/dnn/src/layers/elementwise_layers.hpp +++ b/modules/dnn/src/layers/elementwise_layers.hpp @@ -313,6 +313,16 @@ struct PowerFunctor #endif }; +class ChannelsPReLULayerImpl : public ChannelsPReLULayer +{ +public: + ChannelsPReLULayerImpl() {} + + void allocate(const std::vector &inputs, std::vector &outputs); + + void forward(std::vector &inputs, std::vector &outputs); +}; + } } #endif diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index ecd09e738..3568ccdd4 100755 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -62,7 +62,8 @@ namespace dnn const BlobShape &shape0 = inputs[0]->shape(); for (size_t i = 1; i < inputs.size(); ++i) { - CV_Assert(shape0 == inputs[i]->shape()); + BlobShape iShape = inputs[i]->shape(); + CV_Assert(shape0 == iShape); } outputs.resize(1); outputs[0].create(shape0); diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp new file mode 100644 index 000000000..67c352c8b --- /dev/null +++ b/modules/dnn/src/layers/max_unpooling_layer.cpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +/* +Implementation of Batch Normalization layer. +*/ + +#include "max_unpooling_layer.hpp" + +namespace cv +{ +namespace dnn +{ + +MaxUnpoolLayerImpl::MaxUnpoolLayerImpl(Size outSize_): + outSize(outSize_) +{} + +void MaxUnpoolLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +{ + CV_Assert(inputs.size() == 2); + + BlobShape outShape = inputs[0]->shape(); + outShape[2] = outSize.height; + outShape[3] = outSize.width; + + outputs.resize(1); + outputs[0].create(outShape); +} + +void MaxUnpoolLayerImpl::forward(std::vector &inputs, std::vector &outputs) +{ + CV_Assert(inputs.size() == 2); + Blob& input = *inputs[0]; + Blob& indices = *inputs[1]; + + CV_Assert(input.total() == indices.total()); + CV_Assert(input.num() == 1); + + for(int i_n = 0; i_n < outputs.size(); i_n++) + { + Blob& outBlob = outputs[i_n]; + CV_Assert(input.channels() == outBlob.channels()); + + for (int i_c = 0; i_c < input.channels(); i_c++) + { + Mat outPlane = outBlob.getPlane(0, i_c); + for(int i_wh = 0; i_wh < input.size2().area(); i_wh++) + { + int index = indices.getPlane(0, i_c).at(i_wh); + + CV_Assert(index < outPlane.total()); + outPlane.at(index) = input.getPlane(0, i_c).at(i_wh); + } + } + } +} + +Ptr MaxUnpoolLayer::create(Size unpoolSize) +{ + return Ptr(new MaxUnpoolLayerImpl(unpoolSize)); +} + +} +} diff --git a/modules/dnn/src/layers/max_unpooling_layer.hpp b/modules/dnn/src/layers/max_unpooling_layer.hpp new file mode 100644 index 000000000..42c546fea --- /dev/null +++ b/modules/dnn/src/layers/max_unpooling_layer.hpp @@ -0,0 +1,37 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +/* +Declaration of MaxUnpooling layer. +*/ + +#ifndef __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__ +#define __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__ +#include "../precomp.hpp" +#include + +namespace cv +{ +namespace dnn +{ + +class MaxUnpoolLayerImpl : public MaxUnpoolLayer +{ +public: + MaxUnpoolLayerImpl(Size outSize_); + + void allocate(const std::vector &inputs, std::vector &outputs); + + void forward(std::vector &inputs, std::vector &outputs); + +private: + Size outSize; +}; + +} +} +#endif // __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__ diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp new file mode 100644 index 000000000..2b8265aad --- /dev/null +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -0,0 +1,86 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +/* +Implementation of padding layer, which adds paddings to input blob. +*/ + +#include "padding_layer.hpp" +#include + +namespace cv +{ +namespace dnn +{ + +PaddingLayer::PaddingLayer(LayerParams ¶ms) +{ + paddingDim = params.get("padding_dim"); + padding = abs(params.get("padding")); + inputDims = params.get("input_dims", 0); + index = params.get("index", 0); + paddingValue = params.get("value", 0); + + if(paddingDim < 0 || padding < 0) + CV_Error(cv::Error::StsNotImplemented, "Negative padding and dim aren't supported"); +} + +void PaddingLayer::allocate(const std::vector &inputs, std::vector &outputs) +{ + outputs.resize(inputs.size()); + for(int i = 0; i < inputs.size(); i++) + { + BlobShape shape = inputs[i]->shape(); + int dim = getPadDim(shape); + CV_Assert(dim < shape.dims()); + + shape[dim] += padding; + outputs[i].create(shape); + } +} + +void PaddingLayer::forward(std::vector &inputs, std::vector &outputs) +{ + for(int i = 0; i < inputs.size(); i++) + { + outputs[i].matRef() = paddingValue; + BlobShape inShape = inputs[i]->shape(); + BlobShape outShape = outputs[i].shape(); + int dim = getPadDim(inShape); + + int actualIndex = index; + if(index == 0) + actualIndex = inShape[dim]; + + std::vector > srcDstRanges; + srcDstRanges.push_back(std::make_pair(Range(0, actualIndex), Range(0, actualIndex))); + srcDstRanges.push_back(std::make_pair(Range(actualIndex, inShape[dim]), + Range(actualIndex + padding, outShape[dim]))); + + std::vector srcRanges(inShape.dims(), Range::all()), dstRanges = srcRanges; + + for(int i = 0; i < srcDstRanges.size(); i++) + { + if(!srcDstRanges[i].first.empty()) + { + srcRanges[dim] = srcDstRanges[i].first; + dstRanges[dim] = srcDstRanges[i].second; + Mat dst = outputs[i].matRef()(&dstRanges[0]); + Mat src = inputs[i]->matRef()(&srcRanges[0]).clone(); + src.copyTo(dst); + } + } + } +} + +int PaddingLayer::getPadDim(const BlobShape& shape) const +{ + return inputDims > 0 && shape.dims() > inputDims ? paddingDim + 1 : paddingDim; +} + +} +} diff --git a/modules/dnn/src/layers/padding_layer.hpp b/modules/dnn/src/layers/padding_layer.hpp new file mode 100644 index 000000000..18de09610 --- /dev/null +++ b/modules/dnn/src/layers/padding_layer.hpp @@ -0,0 +1,37 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. + +/* +Declaration of padding layer, which adds paddings to input blob. +*/ + +#ifndef __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__ +#define __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__ +#include "../precomp.hpp" + +namespace cv +{ +namespace dnn +{ + +class PaddingLayer : public Layer +{ +public: + PaddingLayer() {} + PaddingLayer(LayerParams ¶ms); + void allocate(const std::vector &inputs, std::vector &outputs); + void forward(std::vector &inputs, std::vector &outputs); + +private: + int getPadDim(const BlobShape& shape) const; + int paddingDim, padding, inputDims, index; + float paddingValue; +}; + +} +} +#endif diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 2e184503b..f27df8591 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -72,7 +72,7 @@ PoolingLayerImpl::PoolingLayerImpl(int type_, Size kernel_, Size stride_, Size p void PoolingLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) { - CV_Assert(inputs.size() > 0); + CV_Assert(inputs.size() == 1); inp = inputs[0]->size2(); @@ -85,11 +85,19 @@ void PoolingLayerImpl::allocate(const std::vector &inputs, std::vectorrows() == inp.height && inputs[i]->cols() == inp.width); - outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width)); + if (type == MAX) + { + outputs[2 * i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width)); + outputs[2 * i + 1].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width)); + } + else + { + outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width)); + } } } @@ -100,7 +108,7 @@ void PoolingLayerImpl::forward(std::vector &inputs, std::vector &ou switch (type) { case MAX: - maxPooling(*inputs[ii], outputs[ii]); + maxPooling(*inputs[ii], outputs[2 * ii], outputs[2 * ii + 1]); break; case AVE: avePooling(*inputs[ii], outputs[ii]); @@ -112,17 +120,17 @@ void PoolingLayerImpl::forward(std::vector &inputs, std::vector &ou } } -void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst) +void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst, Blob &mask) { if (!useOpenCL) - maxPooling_cpu(src, dst); + maxPooling_cpu(src, dst, mask); else { - CV_Assert(maxPooling_ocl(src, dst)); + CV_Assert(maxPooling_ocl(src, dst, mask)); } } -bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst) +bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst, Blob &mask) { return pooling_ocl("MaxPoolForward", src, dst); } @@ -142,7 +150,7 @@ bool PoolingLayerImpl::avePooling_ocl(Blob &src, Blob &dst) return pooling_ocl("AvePoolForward", src, dst); } -void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst) +void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst, Blob &mask) { CV_DbgAssert(dst.rows() == out.height && dst.cols() == out.width); @@ -152,6 +160,7 @@ void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst) { const float *srcData = src.ptrf(n, c); float *dstData = dst.ptrf(n, c); + float *dstMaskData = mask.ptrf(n, c); for (int ph = 0; ph < out.height; ++ph) { @@ -165,16 +174,21 @@ void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst) wstart = max(wstart, 0); const int poolIndex = ph * out.width + pw; float max_val = -FLT_MAX; + int max_index = -1; for (int h = hstart; h < hend; ++h) for (int w = wstart; w < wend; ++w) { const int index = h * inp.width + w; if (srcData[index] > max_val) + { max_val = srcData[index]; + max_index = index; + } } dstData[poolIndex] = max_val; + dstMaskData[poolIndex] = max_index; } } } @@ -187,7 +201,9 @@ bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst { const UMat &srcMat = src.umatRefConst(); UMat &dstMat = dst.umatRef(); - CV_Assert(mask == NULL && srcMat.offset == 0 && dstMat.offset == 0); + UMat* indexesMat = mask == NULL ? NULL : &dst.umatRef(); + + CV_Assert(srcMat.offset == 0 && dstMat.offset == 0); ocl::Kernel ker(kname, ocl::dnn::pooling_oclsrc, String("-DT=") + ocl::typeToStr(src.type())); if (ker.empty()) @@ -199,7 +215,8 @@ bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst ocl::KernelArg::PtrReadOnly(srcMat), s[0], s[1], s[2], s[3], out.height, out.width, kernel.height, kernel.width, stride.height, stride.width, pad.height, pad.width, - ocl::KernelArg::PtrWriteOnly(dstMat)); + ocl::KernelArg::PtrWriteOnly(dstMat), + ocl::KernelArg(ocl::KernelArg::PTR_ONLY + ocl::KernelArg::WRITE_ONLY, indexesMat)); size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); if (!ker.run(1, &nthreads, &wgSize, true)) diff --git a/modules/dnn/src/layers/pooling_layer.hpp b/modules/dnn/src/layers/pooling_layer.hpp index c5723cd1f..266db1c50 100644 --- a/modules/dnn/src/layers/pooling_layer.hpp +++ b/modules/dnn/src/layers/pooling_layer.hpp @@ -58,9 +58,9 @@ class PoolingLayerImpl : public PoolingLayer bool pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask = NULL); - void maxPooling(Blob &src, Blob &dst); - void maxPooling_cpu(Blob &src, Blob &dst); - bool maxPooling_ocl(Blob &src, Blob &dst); + void maxPooling(Blob &src, Blob &dst, Blob &mask); + void maxPooling_cpu(Blob &src, Blob &dst, Blob &mask); + bool maxPooling_ocl(Blob &src, Blob &dst, Blob &mask); void avePooling(Blob &src, Blob &dst); void avePooling_cpu(Blob &src, Blob &dst); diff --git a/modules/dnn/src/layers/shift_layer.cpp b/modules/dnn/src/layers/shift_layer.cpp index 98bfdfc73..6663640af 100644 --- a/modules/dnn/src/layers/shift_layer.cpp +++ b/modules/dnn/src/layers/shift_layer.cpp @@ -39,17 +39,17 @@ public: virtual void forward(std::vector &inputs, std::vector &outputs, const std::vector& blobs) { for (size_t ii = 0; ii < outputs.size(); ii++) { - Blob &inpBlob = *inputs[ii]; - Blob &outBlob = outputs[ii]; + Blob &inpBlob = *inputs[ii]; + Blob &outBlob = outputs[ii]; - inpBlob.matRef().copyTo(outBlob.matRef()); + inpBlob.matRef().copyTo(outBlob.matRef()); - for (int n = 0; n < inpBlob.num(); n++) - { - Mat dstMat(inpBlob.channels(), inpBlob.rows() * inpBlob.cols(), - outBlob.type(), outBlob.ptr(n)); - dnn::gemm(blobs[0].matRefConst(), biasOnesMat, 1, dstMat, 1); //TODO: gemv - } + for (int n = 0; n < inpBlob.num(); n++) + { + Mat dstMat(inpBlob.channels(), inpBlob.rows() * inpBlob.cols(), + outBlob.type(), outBlob.ptr(n)); + dnn::gemm(blobs[0].matRefConst(), biasOnesMat, 1, dstMat, 1); //TODO: gemv + } } } diff --git a/modules/dnn/src/layers/shift_layer.hpp b/modules/dnn/src/layers/shift_layer.hpp index 1d1c70a85..36808ffbf 100644 --- a/modules/dnn/src/layers/shift_layer.hpp +++ b/modules/dnn/src/layers/shift_layer.hpp @@ -22,13 +22,15 @@ class ShiftLayerImpl; class ShiftLayer : public Layer { - cv::Ptr impl; - public: ShiftLayer() {} ShiftLayer(LayerParams ¶ms); void allocate(const std::vector &inputs, std::vector &outputs); void forward(std::vector &inputs, std::vector &outputs); + +private: + cv::Ptr impl; + }; } diff --git a/modules/dnn/src/opencl/pooling.cl b/modules/dnn/src/opencl/pooling.cl index aeb70bc55..80c96f5ae 100644 --- a/modules/dnn/src/opencl/pooling.cl +++ b/modules/dnn/src/opencl/pooling.cl @@ -24,10 +24,7 @@ * POSSIBILITY OF SUCH DAMAGE. **************************************************************************************/ -__kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, __global T* top_data -#ifdef MASK - , __global int* mask, __global T* top_mask -#endif +__kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, __global T* top_data, __global int* mask ) { int index = get_global_id(0); int tmp = get_global_size(0); @@ -55,13 +52,10 @@ __kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const } } top_data[index] = maxval; -#ifdef MASK + if (mask) { mask[index] = maxidx; - } else { - top_mask[index] = maxidx; } -#endif } } diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index f11028f13..64c3ed200 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -45,11 +45,11 @@ #include #include #include +#include namespace cv { namespace dnn { -#if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER #include "THDiskFile.h" #ifdef NDEBUG @@ -91,6 +91,7 @@ static inline bool endsWith(const String &str, const char *substr) struct TorchImporter : public ::cv::dnn::Importer { + typedef std::map > TensorsMap; Net net; THFile *file; @@ -102,16 +103,10 @@ struct TorchImporter : public ::cv::dnn::Importer { String thName, apiType; dnn::LayerParams params; - std::vector modules; + std::vector > modules; Module(const String &_thName, const String &_apiType = String()) : thName(_thName), apiType(_apiType) {} - - ~Module() - { - for (size_t i = 0; i < modules.size(); i++) - delete modules[i]; - } }; Module *rootModule; @@ -184,6 +179,7 @@ struct TorchImporter : public ::cv::dnn::Importer readedIndexes.insert(index); int size = readInt(); + for (int i = 0; i < size; i++) { readObject(); //key @@ -271,7 +267,7 @@ struct TorchImporter : public ::cv::dnn::Importer storages.insert(std::make_pair(index, storageMat)); } - void readTorchTable(Dict &scalarParams, std::map &tensorParams) + void readTorchTable(Dict &scalarParams, TensorsMap &tensorParams) { int luaType = readInt(); int index = readInt(); @@ -309,7 +305,7 @@ struct TorchImporter : public ::cv::dnn::Importer if (tensors.count(index)) //tensor was readed { - tensorParams.insert(std::make_pair(key, tensors[index])); + tensorParams.insert(std::make_pair(key, std::make_pair(index, tensors[index]))); } else if (storages.count(index)) //storage was readed { @@ -347,9 +343,9 @@ struct TorchImporter : public ::cv::dnn::Importer std::cout << scalarParams; std::cout << "#" << tensorParams.size() << " tensorParams:\n"; - std::map::const_iterator it; + std::map >::const_iterator it; for (it = tensorParams.begin(); it != tensorParams.end(); it++) - std::cout << it->first << ": Tensor " << it->second.shape() << "\n"; + std::cout << it->first << ": Tensor " << it->second.second.shape() << "\n"; } } @@ -375,9 +371,11 @@ struct TorchImporter : public ::cv::dnn::Importer int indexStorage = readInt(); if (readedIndexes.count(indexStorage) == 0) { - int typeStorage = parseStorageType(readTorchClassName()); + String className = readTorchClassName(); + int typeStorage = parseStorageType(className); CV_Assert(typeStorage >= 0 && typeTensor == typeStorage); readTorchStorage(indexStorage, typeStorage); + readedIndexes.insert(indexStorage); } //small check @@ -396,8 +394,7 @@ struct TorchImporter : public ::cv::dnn::Importer } //allocate Blob - Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset, (size_t*)ssteps); - //int dstType = (typeTensor == CV_64F) ? CV_64F : CV_32F; + Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps); int dstType = CV_32F; Blob blob; @@ -436,12 +433,7 @@ struct TorchImporter : public ::cv::dnn::Importer void readTorchObject(int index) { if(readedIndexes.count(index)) - { - if(!storages.count(index) && !tensors.count(index)) - CV_Error(Error::StsNotImplemented, "Objects which have multiple references are not supported"); - else - return; - } + return; String className = readTorchClassName(); String nnName; @@ -461,12 +453,15 @@ struct TorchImporter : public ::cv::dnn::Importer else if (isNNClass(className, nnName)) { Dict scalarParams; - std::map tensorParams; + TensorsMap tensorParams; - Module *newModule = new Module(nnName); + cv::Ptr newModule(new Module(nnName)); cv::dnn::LayerParams &layerParams = newModule->params; - if (nnName == "Sequential" || nnName == "Parallel" || nnName == "Concat") + layerParams.set("torch_index", index); + + if (nnName == "Sequential" || nnName == "Parallel" || + nnName == "Concat" || nnName == "ConcatTable" || nnName == "JoinTable") { Module *parentModule = curModule; curModule->modules.push_back(newModule); @@ -483,6 +478,10 @@ struct TorchImporter : public ::cv::dnn::Importer { layerParams.set("dimension", scalarParams.get("dimension")); } + if (nnName == "JoinTable") + { + layerParams.set("dimension", scalarParams.get("dimension")); + } } else if (nnName == "SpatialConvolution") { @@ -490,12 +489,12 @@ struct TorchImporter : public ::cv::dnn::Importer readTorchTable(scalarParams, tensorParams); CV_Assert(tensorParams.count("weight")); - layerParams.blobs.push_back(tensorParams["weight"]); + layerParams.blobs.push_back(tensorParams["weight"].second); bool bias = tensorParams.count("bias") != 0; layerParams.set("bias_term", bias); if (bias) - layerParams.blobs.push_back(tensorParams["bias"]); + layerParams.blobs.push_back(tensorParams["bias"].second); layerParams.set("num_output", scalarParams.get("nOutputPlane")); convertTorchKernelsParams(scalarParams, layerParams); @@ -507,8 +506,10 @@ struct TorchImporter : public ::cv::dnn::Importer newModule->apiType = "Pooling"; readTorchTable(scalarParams, tensorParams); - if (nnName == "SpatialMaxPooling") + if (nnName == "SpatialMaxPooling") { layerParams.set("pool", "MAX"); + layerParams.set("indices_blob_id", tensorParams["indices"].first); + } if (nnName == "SpatialAveragePooling") layerParams.set("pool", "AVE"); convertTorchKernelsParams(scalarParams, layerParams); @@ -521,12 +522,12 @@ struct TorchImporter : public ::cv::dnn::Importer readTorchTable(scalarParams, tensorParams); CV_Assert(tensorParams.count("weight")); - Blob weightBlob = tensorParams["weight"]; + Blob weightBlob = tensorParams["weight"].second; layerParams.blobs.push_back(weightBlob); bool bias = tensorParams.count("bias") != 0; if (bias) - layerParams.blobs.push_back(tensorParams["bias"]); + layerParams.blobs.push_back(tensorParams["bias"].second); layerParams.set("bias_term", bias); layerParams.set("num_output", weightBlob.size(0)); @@ -549,24 +550,205 @@ struct TorchImporter : public ::cv::dnn::Importer } else if (nnName == "ReLU") { - curModule->modules.push_back(new Module(nnName, "ReLU")); + curModule->modules.push_back(cv::Ptr(new Module(nnName, "ReLU"))); readObject(); } else if (nnName == "Tanh") { - curModule->modules.push_back(new Module(nnName, "TanH")); + curModule->modules.push_back(cv::Ptr(new Module(nnName, "TanH"))); readObject(); } else if (nnName == "Sigmoid") { - curModule->modules.push_back(new Module(nnName, "Sigmoid")); + curModule->modules.push_back(cv::Ptr(new Module(nnName, "Sigmoid"))); + readObject(); + } + else if (nnName == "SpatialBatchNormalization") + { + newModule->apiType = "BatchNorm"; + readTorchTable(scalarParams, tensorParams); + + CV_Assert(tensorParams.count("running_var") && + tensorParams.count("running_mean")); + layerParams.blobs.push_back(tensorParams["running_mean"].second); + layerParams.blobs.push_back(tensorParams["running_var"].second); + + CV_Assert(scalarParams.has("eps")); + layerParams.set("eps", float(scalarParams.get("eps"))); + + layerParams.blobs.push_back(Blob()); + layerParams.blobs.push_back(Blob()); + + if (tensorParams.count("weight")) + { + layerParams.set("has_weight", true); + layerParams.blobs[2] = tensorParams["weight"].second; + } + + if (tensorParams.count("bias")) + { + layerParams.set("has_bias", true); + layerParams.blobs[3] = tensorParams["bias"].second; + } + + curModule->modules.push_back(newModule); + } + else if (nnName == "PReLU") + { + readTorchTable(scalarParams, tensorParams); + + CV_Assert(tensorParams.count("weight")); + + size_t outputChannels = static_cast(scalarParams.get("nOutputPlane")); + if (outputChannels) { + + CV_Assert(tensorParams["weight"].second.total() == outputChannels); + layerParams.blobs.push_back(tensorParams["weight"].second); + + newModule->apiType = "ChannelsPReLU"; + } + else { + CV_Assert(tensorParams["weight"].second.total() == 1); + float negative_slope = *tensorParams["weight"].second.ptrf(); + layerParams.set("negative_slope", negative_slope); + + newModule->apiType = "ReLU"; + } + + curModule->modules.push_back(newModule); + } + else if (nnName == "SpatialDropout") + { + readTorchTable(scalarParams, tensorParams); + CV_Assert(scalarParams.has("p")); + + float scale = 1 - scalarParams.get("p"); + + CV_Assert(scale > 0); + + newModule->apiType = "Power"; + layerParams.set("scale", scale); + curModule->modules.push_back(newModule); + } + else if (nnName == "Identity") + { + readTorchTable(scalarParams, tensorParams); + newModule->apiType = "Identity"; + curModule->modules.push_back(newModule); + } + else if (nnName == "Padding") + { + readTorchTable(scalarParams, tensorParams); + newModule->apiType = "Padding"; + + CV_Assert(scalarParams.has("pad") && + scalarParams.has("dim")); + + layerParams.set("padding_dim", + static_cast(scalarParams.get("dim") - 1)); + layerParams.set("padding", static_cast(scalarParams.get("pad"))); + + if (scalarParams.has("nInputDim")) + layerParams.set("input_dims", + static_cast(scalarParams.get("nInputDim"))); + + if (scalarParams.has("value")) + layerParams.set("value", scalarParams.get("value")); + + if (scalarParams.has("index")) + layerParams.set("index", + static_cast(scalarParams.get("index") - 1)); + + curModule->modules.push_back(newModule); + } + else if (nnName == "CAddTable") + { + curModule->modules.push_back(newModule); readObject(); } + else if (nnName == "SpatialDilatedConvolution") + { + readTorchTable(scalarParams, tensorParams); + newModule->apiType = "Convolution"; + CV_Assert(scalarParams.has("padW") && + scalarParams.has("padH")&& + scalarParams.has("dW")&& + scalarParams.has("dH")&& + scalarParams.has("dilationW")&& + scalarParams.has("dilationH")&& + scalarParams.has("kW")&& + scalarParams.has("kH")&& + scalarParams.has("nOutputPlane")); + + layerParams.set("kernel_w", static_cast(scalarParams.get("kW"))); + layerParams.set("kernel_h", static_cast(scalarParams.get("kH"))); + layerParams.set("pad_w", static_cast(scalarParams.get("padW"))); + layerParams.set("pad_h", static_cast(scalarParams.get("padH"))); + layerParams.set("stride_w", static_cast(scalarParams.get("dW"))); + layerParams.set("stride_h", static_cast(scalarParams.get("dH"))); + layerParams.set("dilation_w", static_cast(scalarParams.get("dilationW"))); + layerParams.set("dilation_h", static_cast(scalarParams.get("dilationH"))); + layerParams.set("num_output", static_cast(scalarParams.get("nOutputPlane"))); + + layerParams.blobs.push_back(tensorParams["weight"].second); + + bool bias = tensorParams.count("bias"); + layerParams.set("bias_term", bias); + if (bias) + layerParams.blobs.push_back(tensorParams["bias"].second); + + curModule->modules.push_back(newModule); + } + else if (nnName == "SpatialFullConvolution") + { + readTorchTable(scalarParams, tensorParams); + newModule->apiType = "Deconvolution"; + CV_Assert(scalarParams.has("padW") && + scalarParams.has("padH")&& + scalarParams.has("dW")&& + scalarParams.has("dH")&& + scalarParams.has("adjW")&& + scalarParams.has("adjH")&& + scalarParams.has("kW")&& + scalarParams.has("kH")&& + scalarParams.has("nOutputPlane")); + + layerParams.set("kernel_w", static_cast(scalarParams.get("kW"))); + layerParams.set("kernel_h", static_cast(scalarParams.get("kH"))); + layerParams.set("pad_w", static_cast(scalarParams.get("padW"))); + layerParams.set("pad_h", static_cast(scalarParams.get("padH"))); + layerParams.set("stride_w", static_cast(scalarParams.get("dW"))); + layerParams.set("stride_h", static_cast(scalarParams.get("dH"))); + layerParams.set("adj_w", static_cast(scalarParams.get("adjW"))); + layerParams.set("adj_h", static_cast(scalarParams.get("adjH"))); + layerParams.set("num_output", static_cast(scalarParams.get("nOutputPlane"))); + + layerParams.blobs.push_back(tensorParams["weight"].second); + + bool bias = tensorParams.count("bias"); + layerParams.set("bias_term", bias); + if (bias) + layerParams.blobs.push_back(tensorParams["bias"].second); + + curModule->modules.push_back(newModule); + } + else if (nnName == "SpatialMaxUnpooling") + { + readTorchTable(scalarParams, tensorParams); + + CV_Assert(scalarParams.has("oheight") && + scalarParams.has("owidth")); + + CV_Assert(tensorParams.count("indices")); + + layerParams.set("out_h", static_cast(scalarParams.get("oheight"))); + layerParams.set("out_w", static_cast(scalarParams.get("owidth"))/2); + layerParams.set("indices_blob_id", tensorParams["indices"].first); + curModule->modules.push_back(newModule); + } else { - delete newModule; CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\""); - readObject(); } } else @@ -606,15 +788,16 @@ struct TorchImporter : public ::cv::dnn::Importer return "l" + toString(++this->moduleCounter) + "_" + label; } - int fill(Module *module, int prevLayerId = 0, int prevOutNum = 0) + int fill(Module *module, std::vector >& addedModules, int prevLayerId = 0, int prevOutNum = 0) { if (module == NULL) return prevLayerId; if (module->apiType.length()) { - int newLayerId = this->net.addLayer(generateLayerName(module->apiType), module->apiType, module->params); + int newLayerId = net.addLayer(generateLayerName(module->apiType), module->apiType, module->params); net.connect(prevLayerId, prevOutNum, newLayerId, 0); + addedModules.push_back(std::make_pair(newLayerId, module)); return newLayerId; } else @@ -623,7 +806,7 @@ struct TorchImporter : public ::cv::dnn::Importer { for (size_t i = 0; i < module->modules.size(); i++) { - prevLayerId = fill(module->modules[i], prevLayerId, prevOutNum); + prevLayerId = fill(module->modules[i], addedModules, prevLayerId, prevOutNum); prevOutNum = 0; } return prevLayerId; @@ -640,10 +823,11 @@ struct TorchImporter : public ::cv::dnn::Importer for (int i = 0; i < (int)module->modules.size(); i++) { - newId = fill(module->modules[i], splitId, i); + newId = fill(module->modules[i], addedModules, splitId, i); net.connect(newId, 0, mergeId, i); } + addedModules.push_back(std::make_pair(mergeId, module)); return mergeId; } else if (module->thName == "Parallel") @@ -664,19 +848,92 @@ struct TorchImporter : public ::cv::dnn::Importer for (int i = 0; i < (int)module->modules.size(); i++) { net.connect(splitId, i, reshapeId, i); - newId = fill(module->modules[i], reshapeId, i); + newId = fill(module->modules[i], addedModules, reshapeId, i); net.connect(newId, 0, mergeId, i); } + addedModules.push_back(std::make_pair(mergeId, module)); + return mergeId; + } + else if (module->thName == "ConcatTable") { + int newId, splitId; + LayerParams splitParams; + + splitId = net.addLayer(generateLayerName("torchSplit"), "Split", splitParams); + net.connect(prevLayerId, prevOutNum, splitId, 0); + + addedModules.push_back(std::make_pair(splitId, module)); + + for (int i = 0; i < (int)module->modules.size(); i++) + { + newId = fill(module->modules[i], addedModules, splitId, i); + } + + return newId; + } + else if (module->thName == "JoinTable") { + std::vector ids = net.getUnconnectedOutLayers(); + + int mergeId; + LayerParams mergeParams; + mergeParams.set("axis", module->params.get("dimension") - 1); + + mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams); + addedModules.push_back(std::make_pair(mergeId, module)); + + for (int i = 0; i < ids.size(); i++) + { + net.connect(ids[i], 0, mergeId, i); + } + return mergeId; } + else if (module->thName == "CAddTable") { + String name = generateLayerName("torchCAddTable"); + std::vector ids = net.getUnconnectedOutLayers(); + LayerParams params; + params.set("operation", "sum"); + + + int id = net.addLayer(name, "Eltwise", params); + + for (int i = 0; i < ids.size(); i++) + { + net.connect(ids[i], 0, id, i); + } + + addedModules.push_back(std::make_pair(id, module)); + return id; + } + else if (module->thName == "SpatialMaxUnpooling") { + String name = generateLayerName("torchMaxUnpooling"); + int id = net.addLayer(name, "MaxUnpool", module->params); + net.connect(prevLayerId, 0, id, 0); + + CV_Assert(module->params.has("indices_blob_id")); + + int indicesBlobId = module->params.get("indices_blob_id"); + + for(int i = 0; i < addedModules.size(); i++) + { + if (addedModules[i].second->apiType == "Pooling" && + addedModules[i].second->params.has("indices_blob_id") && + addedModules[i].second->params.get("indices_blob_id") == indicesBlobId) + { + net.connect(addedModules[i].first, 1, id, 1); + break; + } + } + + return id; + } } CV_Error(Error::StsInternal, "Unexpected torch container: " + module->thName); return -1; } - void populateNet(Net net) + void populateNet(Net net_) { if (rootModule == NULL) { @@ -687,8 +944,9 @@ struct TorchImporter : public ::cv::dnn::Importer readObject(); } - this->net = net; - fill(rootModule); + net = net_; + std::vector > addedModules; + fill(rootModule, addedModules); } }; @@ -707,20 +965,5 @@ Blob readTorchBlob(const String &filename, bool isBinary) return importer->tensors.begin()->second; } -#else //ENABLE_TORCH_IMPORTER - -Ptr createTorchImporter(const String&, bool) -{ - CV_Error(Error::StsNotImplemented, "Module was build without Torch importer"); - return Ptr(); -} - -Blob readTorchBlob(const String&, bool) -{ - CV_Error(Error::StsNotImplemented, "Module was build without Torch importer"); - return Blob(); -} - -#endif //ENABLE_TORCH_IMPORTER } } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 6680de31c..9a5660b19 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -154,6 +154,7 @@ TEST(Layer_Test_DeConvolution, Accuracy) { OCL_OFF(testLayerUsingCaffeModels("layer_deconvolution", true, false)); } + OCL_TEST(Layer_Test_DeConvolution, Accuracy) { OCL_ON(testLayerUsingCaffeModels("layer_deconvolution", true, false);); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 9f5d08637..8832766e1 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -38,13 +38,12 @@ TEST(Test_TensorFlow, read_inception) resize(sample, input, Size(224, 224)); input -= 128; // mean sub - std::vector inpMats; - inpMats.push_back(input); + dnn::Blob inputBlob = dnn::Blob::fromImages(input); - net.setBlob("_input.input", Blob(inpMats)); + net.setBlob("_input.input", inputBlob); net.forward(); - Blob out = net.getBlob("output"); + Blob out = net.getBlob("softmax2"); std::cout << out.dims() << std::endl; }