diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 1e3d1fc7b..3db00b593 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -267,6 +267,14 @@ namespace dnn static Ptr<SoftmaxLayer> create(int axis = 1); }; + class CV_EXPORTS_W InnerProductLayer : public Layer + { + public: + int axis; + + static Ptr<InnerProductLayer> create(int axis = 1); + }; + //! @} //! @} diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index eac3eb8a5..e7e9ba3d2 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -84,7 +84,7 @@ void initModule() REG_RUNTIME_LAYER_FUNC(Pooling, createPoolingLayerFromCaffe) REG_RUNTIME_LAYER_CLASS(MVN, MVNLayer) REG_RUNTIME_LAYER_FUNC(LRN, createLRNLayerFromCaffe) - REG_RUNTIME_LAYER_CLASS(InnerProduct, FullyConnectedLayer) + REG_RUNTIME_LAYER_FUNC(InnerProduct, createInnerProductLayerFromCaffe) REG_RUNTIME_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>) REG_RUNTIME_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>) diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 80b91be5d..3e6668016 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -43,73 +43,110 @@ #include "layers_common.hpp" #include "fully_connected_layer.hpp" #include "op_blas.hpp" +#include <opencv2/dnn/shape_utils.hpp> +#include <opencv2/core/ocl.hpp> namespace cv { namespace dnn { - FullyConnectedLayer::FullyConnectedLayer(LayerParams ¶ms) : Layer(params) - { - numOutputs = params.get<int>("num_output"); - bias = params.get<bool>("bias_term", true); - axis_ = params.get<int>("axis", 1); - CV_Assert(blobs.size() == (bias ? 2U : 1U)); - CV_Assert(blobs[0].dims() >= 2 && blobs[0].total() >= (size_t)numOutputs); - CV_Assert(!bias || blobs[1].total() == (size_t)numOutputs); - } +FullyConnectedLayerImpl::FullyConnectedLayerImpl(int axis_) +{ + axis = axis_; +} - void FullyConnectedLayer::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output) - { - CV_Assert(input.size() > 0); +void FullyConnectedLayerImpl::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output) +{ + CV_Assert(input.size() > 0); + CV_Assert(1 <= blobs.size() && blobs.size() <= 2); + CV_Assert(blobs[0].dims() == 2); - axis = input[0]->canonicalAxis(axis_); - innerSize = (int)input[0]->total(axis); + bias = (blobs.size() >= 1); + axisCan = input[0]->canonicalAxis(axis); + dtype = input[0]->type(); + numOutput = blobs[0].size(0); + innerSize = blobs[0].size(1); + outerSize = input[0]->total(0, axisCan); - CV_Assert((size_t)innerSize * (size_t)numOutputs == blobs[0].total()); - CV_Assert(blobs[0].size(-2) == numOutputs && blobs[0].size(-1) == innerSize); + CV_Assert((size_t)innerSize == input[0]->total(axisCan)); + CV_Assert(!bias || (size_t)numOutput == blobs[1].total()); - output.resize(input.size()); - for (size_t i = 0; i < input.size(); i++) - { - if (i != 0) - CV_Assert(input[i]->equalShape(*input[0])); + useOpenCL = ocl::useOpenCL(); + int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_UMAT; - this->reshape(*input[i], output[i]); - } - } + biasOnesBlob.create(Shape(outerSize, 1), dtype, allocFlags); + if (useOpenCL) + biasOnesBlob.getRef<UMat>().setTo(1); + else + biasOnesBlob.getRef<Mat>().setTo(1); - void FullyConnectedLayer::reshape(const Blob &inp, Blob &out) + output.resize(input.size()); + for (size_t i = 0; i < input.size(); i++) { - BlobShape inpShape = inp.shape(); - BlobShape outShape(axis+1, inpShape.ptr()); - outShape[axis] = numOutputs; + CV_Assert(i == 0 || (input[i]->equalShape(*input[0]) && input[i]->type() == dtype)); + Shape outShape = input[i]->shape().slice(0, axis) + Shape(numOutput); + output[i].create(outShape, dtype, allocFlags); + } +} - out.create(outShape, inp.type()); +void FullyConnectedLayerImpl::forward(std::vector<Blob*> &input, std::vector<Blob> &output) +{ + if (!useOpenCL) + forward_<Mat>(input, output); + else + forward_<UMat>(input, output); +} + +template<typename XMat> +void FullyConnectedLayerImpl::forward_(std::vector<Blob *> &input, std::vector<Blob> &output) +{ + const XMat &weight = blobs[0].getRefConst<XMat>(); + const XMat *biasMat, *biasOnesMat; + if (bias) + { + biasOnesMat = &biasOnesBlob.getRefConst<XMat>(); + biasMat = &blobs[1].getRefConst<XMat>(); } - void FullyConnectedLayer::forward(std::vector<Blob*> &input, std::vector<Blob> &output) + for (size_t i = 0; i < input.size(); i++) { - for (size_t i = 0; i < input.size(); i++) - { - int M = (int)input[i]->total(0, axis); - int N = numOutputs; - int K = innerSize; - - Mat srcMat(M, K, input[i]->type(), input[i]->ptrf()); - Mat weight(N, K, blobs[0].type(), blobs[0].ptrf()); - Mat dstMat(M, N, output[i].type(), output[i].ptrf()); - - //important: for perfomance purposes Caffe stores weights as transposed array - gemmCPU(srcMat, weight, 1, dstMat, 0, GEMM_2_T); - - if (bias) - { - Mat biasOnesMat = Mat::ones(M, 1, CV_32F); - Mat biasMat(1, N, CV_32F, blobs[1].ptrf()); - gemmCPU(biasOnesMat, biasMat, 1, dstMat, 1); - } - } + const XMat srcMat = reshaped(input[i]->getRefConst<XMat>(), Shape(outerSize, innerSize)); + XMat dstMat = reshaped(output[i].getRef<XMat>(), Shape(outerSize, numOutput)); + dnn::gemm(srcMat, weight, 1, dstMat, 0, GEMM_2_T); + + if (bias) + dnn::gemm(*biasOnesMat, *biasMat, 1, dstMat, 1); } } + + +Ptr<InnerProductLayer> InnerProductLayer::create(int axis) +{ + return Ptr<InnerProductLayer>(new FullyConnectedLayerImpl(axis)); +} + +Ptr<Layer> createInnerProductLayerFromCaffe(LayerParams ¶ms) +{ + const std::vector<Blob> &blobs = params.blobs; + CV_Assert(1 <= blobs.size() && blobs.size() <= 2); + + int numOutputs = params.get<int>("num_output"); + int innerSize = (int)blobs[0].total() / numOutputs; + bool bias = params.get<bool>("bias_term", true); + int axis = params.get<int>("axis", 1); + + CV_Assert(blobs[0].dims() >= 2 && (size_t)(innerSize * numOutputs) == blobs[0].total()); + CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutputs == blobs[1].total())); + + Ptr<InnerProductLayer> l = InnerProductLayer::create(axis); + l->setParamsFrom(params); + l->blobs[0].reshape(Shape(numOutputs, innerSize)); + if (bias) + l->blobs[1].reshape(Shape(1, numOutputs)); + + return Ptr<Layer>(l); +} + +} } diff --git a/modules/dnn/src/layers/fully_connected_layer.hpp b/modules/dnn/src/layers/fully_connected_layer.hpp index 5213b98d8..714593e4c 100644 --- a/modules/dnn/src/layers/fully_connected_layer.hpp +++ b/modules/dnn/src/layers/fully_connected_layer.hpp @@ -42,26 +42,32 @@ #ifndef __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__ #include "../precomp.hpp" +#include <opencv2/dnn/all_layers.hpp> namespace cv { namespace dnn { - class FullyConnectedLayer : public Layer - { - bool bias; - int numOutputs; - int axis_, axis; - int innerSize; +class FullyConnectedLayerImpl : public InnerProductLayer +{ + int axisCan, dtype; + int numOutput, innerSize, outerSize; + bool bias, useOpenCL; + Blob biasOnesBlob; + + template<typename XMat> + void forward_(std::vector<Blob*> &input, std::vector<Blob> &output); + +public: + + FullyConnectedLayerImpl(int axisCan = 1); + void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output); + void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); +}; - void reshape(const Blob &inp, Blob &out); +Ptr<Layer> createInnerProductLayerFromCaffe(LayerParams ¶ms); - public: - FullyConnectedLayer(LayerParams ¶ms); - void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output); - void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); - }; } } #endif diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 94d8945cf..009a32c57 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -181,7 +181,7 @@ TEST(Layer_Test_Reshape, squeeze) rl->allocate(inpVec, outVec); rl->forward(inpVec, outVec); - EXPECT_EQ(outVec[0].shape(), BlobShape(Vec3i(4, 3, 2))); + EXPECT_EQ(outVec[0].shape(), BlobShape(4, 3, 2)); } TEST(Layer_Test_Reshape_Split_Slice, Accuracy)