Layers implementations divided onto separated files.

10 years ago · eba62d5068
parent 2638433849
commit eba62d5068
8 changed files with 534 additions and 499 deletions
--- a/modules/dnn/src/layers.cpp
+++ b/modules/dnn/src/layers.cpp
@ -1,391 +0,0 @@
 #include "precomp.hpp"
 #include "layers.hpp"
 #include <math.h>
 #include <float.h>
 #include <iostream>
 #include <algorithm>
 using std::max;
 using std::min;
 namespace cv
 {
 namespace dnn
 {
 struct ReLUFunctor
 {
    float negative_slope;
    ReLUFunctor(LayerParams &params)
    {
        if (params.has("negative_slope"))
            negative_slope = params.get<float>("negative_slope");
        else
            negative_slope = 0.f;
    }
    inline float operator()(float x)
    {
        return (x >= 0) ? x : negative_slope * x;
    }
 };
 struct TanHFunctor
 {
    TanHFunctor(LayerParams &params) {}
    inline float operator()(float x)
    {
        return tanh(x);
    }
 };
 REGISTER_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>)
 REGISTER_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>)
 REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer)
 REGISTER_LAYER_CLASS(Pooling, PoolingLayer)
 REGISTER_LAYER_CLASS(InnerProduct, FullyConnectedLayer)
 //////////////////////////////////////////////////////////////////////////
 static void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW)
 {
    if (params.has("kernel_h") && params.has("kernel_w"))
    {
        kernelH = params.get<int>("kernel_h");
        kernelW = params.get<int>("kernel_w");
    }
    else if (params.has("kernel_size"))
    {
        kernelH = kernelW = params.get<int>("kernel_size");
    }
    else
    {
        CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
    }
    if (params.has("pad_h") && params.has("pad_w"))
    {
        padH = params.get<int>("pad_h");
        padW = params.get<int>("pad_w");
    }
    else
    {
        padH = padW = params.get<int>("pad", 0);
    }
    if (params.has("stride_h") && params.has("stride_w"))
    {
        strideH = params.get<int>("stride_h");
        strideW = params.get<int>("stride_w");
    }
    else
    {
        strideH = strideW = params.get<int>("stride", 1);
    }
    CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 & strideW > 0);
 }
 PoolingLayer::PoolingLayer(LayerParams &params)
 {
    if (params.has("pool"))
    {
        String pool = params.get<String>("pool").toLowerCase();
        if (pool == "max")
            type = MAX;
        else if (pool == "ave")
            type = AVE;
        else if (pool == "stochastic")
            type = STOCHASTIC;
        else
            CV_Error(cv::Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
    }
    else
    {
        type = MAX;
    }
    getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW);
 }
 void PoolingLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() > 0);
    inH = inputs[0]->cols();
    inW = inputs[0]->rows();
    computeOutputShape(inH, inW);
    outputs.resize(inputs.size());
    for (size_t i = 0; i < inputs.size(); i++)
    {
        CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW);
        outputs[i].create(inputs[i]->num(), inputs[i]->channels(), pooledH, pooledW);
    }
 }
 void PoolingLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    for (size_t ii = 0; ii < inputs.size(); ii++)
    {
        switch (type)
        {
        case MAX:
            maxPooling(*inputs[ii], outputs[ii]);
            break;
        default:
            CV_Error(cv::Error::StsNotImplemented, "Not implemented");
            break;
        }
    }
 }
 void PoolingLayer::maxPooling(Blob &input, Blob &output)
 {
    CV_DbgAssert(output.rows() == pooledH && output.cols() == pooledW);
    for (int n = 0; n < input.num(); ++n) 
    {
        for (int c = 0; c < input.channels(); ++c)
        {
            float *srcData = input.ptr<float>(n, c);
            float *dstData = output.ptr<float>(n, c);
            for (int ph = 0; ph < pooledH; ++ph)
            {
                for (int pw = 0; pw < pooledW; ++pw)
                {
                    int hstart = ph * strideH - padH;
                    int wstart = pw * strideW - padW;
                    int hend = min(hstart + kernelH, inH);
                    int wend = min(wstart + kernelW, inW);
                    hstart = max(hstart, 0);
                    wstart = max(wstart, 0);
                    const int pool_index = ph * pooledW + pw;
                    float max_val = -FLT_MAX;
                    for (int h = hstart; h < hend; ++h)
                        for (int w = wstart; w < wend; ++w) 
                        {
                            const int index = h * inW + w;
                            if (srcData[index] > max_val)
                                max_val = srcData[index];
                        }
                    dstData[pool_index] = max_val;
                }
            }
        }
    }
 }
 void PoolingLayer::computeOutputShape(int inH, int inW)
 {
    //Yeah something strange Caffe scheme-)
    pooledH = static_cast<int>(ceil(static_cast<float>(inH + 2 * padH - kernelH) / strideH)) + 1;
    pooledW = static_cast<int>(ceil(static_cast<float>(inW + 2 * padW - kernelW) / strideW)) + 1;
    if (padH || padW)
    {
        // If we have padding, ensure that the last pooling starts strictly
        // inside the image (instead of at the padding); otherwise clip the last.
        if ((pooledH - 1) * strideH >= inH + padH)
            --pooledH;
        if ((pooledW - 1) * strideW >= inW + padW)
            --pooledW;
        CV_Assert((pooledH - 1) * strideH < inH + padH);
        CV_Assert((pooledW - 1) * strideW < inW + padW);
    }
 }
 //////////////////////////////////////////////////////////////////////////
 ConvolutionLayer::ConvolutionLayer(LayerParams &params)
 {
    getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW);
    numOutput = params.get<int>("num_output");
    bias = params.get<bool>("bias_term", true);
    group = params.get<int>("group", 1);
    CV_Assert(numOutput % group == 0);
    CV_Assert(params.learnedBlobs.size() >= 1 && (!bias || params.learnedBlobs.size() >= 2));
    learnedParams.assign(params.learnedBlobs.begin(), params.learnedBlobs.begin() + (bias ? 2 : 1));
    Blob &weightBlob = learnedParams[0];
    CV_Assert(weightBlob.cols() == kernelW && weightBlob.rows() == kernelH && weightBlob.num() == numOutput);
    if (bias)
    {
        Blob &biasBlob = learnedParams[1];
        CV_Assert(biasBlob.total() == numOutput);
    }
 }
 void ConvolutionLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() > 0);
    Blob &weightBlob = learnedParams[0];
    inCn = inputs[0]->channels();
    CV_Assert(inCn % group == 0 && weightBlob.channels() == inCn);
    inH = inputs[0]->rows();
    inW = inputs[0]->cols();
    computeOutputShape(inH, inW);
    outputs.resize(inputs.size());
    for (size_t i = 0; i < inputs.size(); i++)
    {
        CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW && inputs[i]->channels() == inCn);
        int num = inputs[i]->num();
        outputs[i].create(num, numOutput, outH, outW);
    }
    colCn = kernelH * kernelW * inCn;
    imColsMat.create(colCn, outH * outW, CV_32F);
    if (bias)
    {
        biasOnesMat = Mat::ones(1, outH * outW, CV_32F);
    }
 }
 template <typename Dtype>
 void im2col_cpu(const Dtype* data_im, const int channels,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w,
    const int stride_h, const int stride_w,
    Dtype* data_col)
 {
  int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
  int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
  int channels_col = channels * kernel_h * kernel_w;
  for (int c = 0; c < channels_col; ++c) {
    int w_offset = c % kernel_w;
    int h_offset = (c / kernel_w) % kernel_h;
    int c_im = c / kernel_h / kernel_w;
    for (int h = 0; h < height_col; ++h) {
      for (int w = 0; w < width_col; ++w) {
        int h_pad = h * stride_h - pad_h + h_offset;
        int w_pad = w * stride_w - pad_w + w_offset;
        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
          data_col[(c * height_col + h) * width_col + w] =
            data_im[(c_im * height + h_pad) * width + w_pad];
        else
          data_col[(c * height_col + h) * width_col + w] = 0;
      }
    }
  }
 }
 void ConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() == outputs.size());
    float *colPtr = imColsMat.ptr<float>();
    float *weigtsPtr = learnedParams[0].ptr<float>();
    float *biasPtr = (bias) ? learnedParams[1].ptr<float>() : NULL;
    CV_Assert(group == 1);
    for (size_t ii = 0; ii < outputs.size(); ii++)
    {
        int num = inputs[ii]->num();
        for (int n = 0; n < num; n++)
        {
            float *srcImPtr = inputs[ii]->ptr<float>(n);
            float *dstImPtr = outputs[ii].ptr<float>(n);
            im2col_cpu(srcImPtr, inCn, inH, inW, kernelH, kernelW, padH, padW, strideH, strideW, colPtr);
            Mat weightsMat(numOutput, colCn, CV_32F, weigtsPtr);
            Mat dstIm(numOutput, outH*outW, CV_32F, dstImPtr);
            cv::gemm(weightsMat, imColsMat, 1, noArray(), 0, dstIm);
            if (bias)
            {
                Mat biasMat(numOutput, 1, CV_32F, biasPtr);
                cv::gemm(biasMat, biasOnesMat, 1, dstIm, 1, dstIm);
            }
        }
    }
 }
 void ConvolutionLayer::computeOutputShape(int inH, int inW)
 {
    outH = (inH + 2 * padH - kernelH) / strideH + 1;
    outW = (inW + 2 * padW - kernelW) / strideW + 1;
 }
 //////////////////////////////////////////////////////////////////////////
 FullyConnectedLayer::FullyConnectedLayer(LayerParams &params)
 {
    numOutputs = params.get<int>("num_output");
    bias = params.get<bool>("bias_term", true);
    CV_Assert(params.learnedBlobs.size() >= 1);
    CV_Assert(!bias || (params.learnedBlobs.size() >= 2 && params.learnedBlobs[1].total() == numOutputs));
    learnedParams.resize(bias ? 2 : 1);
    learnedParams[0] = params.learnedBlobs[0];
    if (bias)
    {
        learnedParams[1] = params.learnedBlobs[1];
    }
 }
 void FullyConnectedLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() > 0);
    inC = inputs[0]->channels();
    inH = inputs[0]->rows();
    inW = inputs[0]->cols();
    inSize = inC * inH * inW;
    CV_Assert(inSize * numOutputs == learnedParams[0].total());
    outputs.resize(inputs.size());
    for (size_t i = 0; i < inputs.size(); i++)
    {
        if (i != 0)
            CV_Assert(inputs[i]->channels() == inC && inputs[i]->rows() == inH && inputs[i]->cols() == inW);
        outputs[i].create(inputs[i]->num(), numOutputs, 1, 1);
    }
 }
 void FullyConnectedLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    for (size_t i = 0; i < inputs.size(); i++)
    {
        int M = inputs[i]->num();
        int N = numOutputs;
        int K = inSize;
        Mat srcMat(M, K, CV_32F, inputs[i]->ptr<float>());
        Mat weights(K, N, CV_32F, learnedParams[0].ptr<float>());
        Mat dstMat(M, N, CV_32F, outputs[i].ptr<float>());
        cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat);
        if (bias)
        {
            Mat biasOnesMat = Mat::ones(M, 1, CV_32F);
            Mat biasMat(1, N, CV_32F, learnedParams[1].ptr<float>());
            cv::gemm(biasOnesMat, biasMat, 1, dstMat, 1, dstMat);
        }
    }
 }
 }
 }
--- a/modules/dnn/src/layers.hpp
+++ b/modules/dnn/src/layers.hpp
@ -1,108 +0,0 @@
 #ifndef __OPENCV_DNN_LAYERS_HPP__
 #define __OPENCV_DNN_LAYERS_HPP__
 #include <opencv2/dnn.hpp>
 namespace cv
 {
 namespace dnn
 {
    template<typename Func>
    class ElementWiseLayer : public Layer
    {
        Func func;
    public:
        ElementWiseLayer(LayerParams &_params) : func(_params) {}
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
        {
            outputs.resize(inputs.size());
            for (size_t i = 0; i < inputs.size(); i++)
                outputs[i] = *inputs[i];
        }
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
        {
            CV_Assert(inputs.size() == outputs.size());
            for (size_t i = 0; i < inputs.size(); i++)
            {
                CV_Assert(inputs[i]->ptr<float>() == outputs[i].ptr<float>());
                float *data = outputs[i].ptr<float>();
                size_t size = outputs[i].total();
                //Vec4i shape = outputs[0].shape();
                //CV_Assert(pitch[i] == shape[i] * sizeof(float) );
                for (size_t j = 0; j < size; j++)
                    data[j] = func(data[j]);
            }
        }
    };
    class PoolingLayer : public Layer
    {
        enum 
        {
            MAX,
            AVE,
            STOCHASTIC
        };
        int type;
        int padH, padW;
        int strideH, strideW;
        int kernelH, kernelW;
        int inH, inW;
        int pooledH, pooledW;
        void computeOutputShape(int inH, int inW);
        void maxPooling(Blob &input, Blob &output);
    public:
        PoolingLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
    class ConvolutionLayer : public Layer
    {
        bool bias;
        int numOutput, group;
        int padH, padW;
        int strideH, strideW;
        int kernelH, kernelW;
        int inH, inW, inCn, colCn;
        int outH, outW;
        Mat imColsMat, biasOnesMat;
        void computeOutputShape(int inH, int inW);
    public:
        ConvolutionLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
    class FullyConnectedLayer : public Layer
    {
        bool bias;
        int numOutputs;
        int inC, inH, inW;
        size_t inSize;
    public:
        FullyConnectedLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
 }
 }
 #endif
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -0,0 +1,157 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 namespace cv
 {
 namespace dnn
 {
    //TODO: implement group parameter
    //TODO: simultaneously convolution and bias addition for cache optimization
    class ConvolutionLayer : public Layer
    {
        bool bias;
        int numOutput, group;
        int padH, padW;
        int strideH, strideW;
        int kernelH, kernelW;
        int inH, inW, inCn, colCn;
        int outH, outW;
        Mat imColsMat, biasOnesMat;
        void computeOutputShape(int inH, int inW);
    public:
        ConvolutionLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
    REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer)
    ConvolutionLayer::ConvolutionLayer(LayerParams &params)
    {
        getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW);
        numOutput = params.get<int>("num_output");
        bias = params.get<bool>("bias_term", true);
        group = params.get<int>("group", 1);
        CV_Assert(numOutput % group == 0);
        CV_Assert(params.learnedBlobs.size() >= 1 && (!bias || params.learnedBlobs.size() >= 2));
        learnedParams.assign(params.learnedBlobs.begin(), params.learnedBlobs.begin() + (bias ? 2 : 1));
        Blob &weightBlob = learnedParams[0];
        CV_Assert(weightBlob.cols() == kernelW && weightBlob.rows() == kernelH && weightBlob.num() == numOutput);
        if (bias)
        {
            Blob &biasBlob = learnedParams[1];
            CV_Assert(biasBlob.total() == numOutput);
        }
    }
    void ConvolutionLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        CV_Assert(inputs.size() > 0);
        Blob &weightBlob = learnedParams[0];
        inCn = inputs[0]->channels();
        CV_Assert(inCn % group == 0 && weightBlob.channels() == inCn);
        inH = inputs[0]->rows();
        inW = inputs[0]->cols();
        computeOutputShape(inH, inW);
        outputs.resize(inputs.size());
        for (size_t i = 0; i < inputs.size(); i++)
        {
            CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW && inputs[i]->channels() == inCn);
            int num = inputs[i]->num();
            outputs[i].create(num, numOutput, outH, outW);
        }
        colCn = kernelH * kernelW * inCn;
        imColsMat.create(colCn, outH * outW, CV_32F);
        if (bias)
        {
            biasOnesMat = Mat::ones(1, outH * outW, CV_32F);
        }
    }
    template <typename Dtype>
    void im2col_cpu(const Dtype* data_im, const int channels,
        const int height, const int width, const int kernel_h, const int kernel_w,
        const int pad_h, const int pad_w,
        const int stride_h, const int stride_w,
        Dtype* data_col)
    {
        int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
        int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
        int channels_col = channels * kernel_h * kernel_w;
        for (int c = 0; c < channels_col; ++c) {
            int w_offset = c % kernel_w;
            int h_offset = (c / kernel_w) % kernel_h;
            int c_im = c / kernel_h / kernel_w;
            for (int h = 0; h < height_col; ++h) {
                for (int w = 0; w < width_col; ++w) {
                    int h_pad = h * stride_h - pad_h + h_offset;
                    int w_pad = w * stride_w - pad_w + w_offset;
                    if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
                        data_col[(c * height_col + h) * width_col + w] =
                        data_im[(c_im * height + h_pad) * width + w_pad];
                    else
                        data_col[(c * height_col + h) * width_col + w] = 0;
                }
            }
        }
    }
    void ConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        CV_Assert(inputs.size() == outputs.size());
        float *colPtr = imColsMat.ptr<float>();
        float *weigtsPtr = learnedParams[0].ptr<float>();
        float *biasPtr = (bias) ? learnedParams[1].ptr<float>() : NULL;
        CV_Assert(group == 1);
        for (size_t ii = 0; ii < outputs.size(); ii++)
        {
            int num = inputs[ii]->num();
            for (int n = 0; n < num; n++)
            {
                float *srcImPtr = inputs[ii]->ptr<float>(n);
                float *dstImPtr = outputs[ii].ptr<float>(n);
                im2col_cpu(srcImPtr, inCn, inH, inW, kernelH, kernelW, padH, padW, strideH, strideW, colPtr);
                Mat weightsMat(numOutput, colCn, CV_32F, weigtsPtr);
                Mat dstIm(numOutput, outH*outW, CV_32F, dstImPtr);
                cv::gemm(weightsMat, imColsMat, 1, noArray(), 0, dstIm);
                if (bias)
                {
                    Mat biasMat(numOutput, 1, CV_32F, biasPtr);
                    cv::gemm(biasMat, biasOnesMat, 1, dstIm, 1, dstIm);
                }
            }
        }
    }
    void ConvolutionLayer::computeOutputShape(int inH, int inW)
    {
        outH = (inH + 2 * padH - kernelH) / strideH + 1;
        outW = (inW + 2 * padW - kernelW) / strideW + 1;
    }
 }
 }
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@ -0,0 +1,74 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include <math.h>
 namespace cv
 {
 namespace dnn
 {
    template<typename Func>
    class ElementWiseLayer : public Layer
    {
        Func func;
    public:
        ElementWiseLayer(LayerParams &_params) : func(_params) {}
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
        {
            outputs.resize(inputs.size());
            for (size_t i = 0; i < inputs.size(); i++)
                outputs[i] = *inputs[i]; //no data copy
        }
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
        {
            CV_Assert(inputs.size() == outputs.size());
            for (size_t i = 0; i < inputs.size(); i++)
            {
                CV_Assert(inputs[i]->ptr<float>() == outputs[i].ptr<float>());
                float *data = outputs[i].ptr<float>();
                size_t size = outputs[i].total();
                for (size_t j = 0; j < size; j++)
                    data[j] = func(data[j]);
            }
        }
    };
    struct ReLUFunctor
    {
        float negative_slope;
        ReLUFunctor(LayerParams &params)
        {
            if (params.has("negative_slope"))
                negative_slope = params.get<float>("negative_slope");
            else
                negative_slope = 0.f;
        }
        inline float operator()(float x)
        {
            return (x >= 0) ? x : negative_slope * x;
        }
    };
    struct TanHFunctor
    {
        TanHFunctor(LayerParams &params) {}
        inline float operator()(float x)
        {
            return tanh(x);
        }
    };
    REGISTER_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>)
    REGISTER_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>)
 }
 }
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@ -0,0 +1,87 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 namespace cv
 {
 namespace dnn
 {
    //TODO: implement axis number parameter
    class FullyConnectedLayer : public Layer
    {
        bool bias;
        int numOutputs;
        int inC, inH, inW;
        size_t inSize;
    public:
        FullyConnectedLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
    REGISTER_LAYER_CLASS(InnerProduct, FullyConnectedLayer)
    FullyConnectedLayer::FullyConnectedLayer(LayerParams &params)
    {
        numOutputs = params.get<int>("num_output");
        bias = params.get<bool>("bias_term", true);
        CV_Assert(params.learnedBlobs.size() >= 1);
        CV_Assert(!bias || (params.learnedBlobs.size() >= 2 && params.learnedBlobs[1].total() == numOutputs));
        learnedParams.resize(bias ? 2 : 1);
        learnedParams[0] = params.learnedBlobs[0];
        if (bias)
        {
            learnedParams[1] = params.learnedBlobs[1];
        }
    }
    void FullyConnectedLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        CV_Assert(inputs.size() > 0);
        inC = inputs[0]->channels();
        inH = inputs[0]->rows();
        inW = inputs[0]->cols();
        inSize = inC * inH * inW;
        CV_Assert(inSize * numOutputs == learnedParams[0].total());
        outputs.resize(inputs.size());
        for (size_t i = 0; i < inputs.size(); i++)
        {
            if (i != 0)
                CV_Assert(inputs[i]->channels() == inC && inputs[i]->rows() == inH && inputs[i]->cols() == inW);
            outputs[i].create(inputs[i]->num(), numOutputs, 1, 1);
        }
    }
    void FullyConnectedLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        for (size_t i = 0; i < inputs.size(); i++)
        {
            int M = inputs[i]->num();
            int N = numOutputs;
            int K = inSize;
            Mat srcMat(M, K, CV_32F, inputs[i]->ptr<float>());
            Mat weights(K, N, CV_32F, learnedParams[0].ptr<float>());
            Mat dstMat(M, N, CV_32F, outputs[i].ptr<float>());
            cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat);
            if (bias)
            {
                Mat biasOnesMat = Mat::ones(M, 1, CV_32F);
                Mat biasMat(1, N, CV_32F, learnedParams[1].ptr<float>());
                cv::gemm(biasOnesMat, biasMat, 1, dstMat, 1, dstMat);
            }
        }
    }
 }
 }
--- a/modules/dnn/src/layers/layers_common.cpp
+++ b/modules/dnn/src/layers/layers_common.cpp
@ -0,0 +1,48 @@
 #include "layers_common.hpp"
 namespace cv
 {
 namespace dnn
 {
 void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW)
 {
    if (params.has("kernel_h") && params.has("kernel_w"))
    {
        kernelH = params.get<int>("kernel_h");
        kernelW = params.get<int>("kernel_w");
    }
    else if (params.has("kernel_size"))
    {
        kernelH = kernelW = params.get<int>("kernel_size");
    }
    else
    {
        CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
    }
    if (params.has("pad_h") && params.has("pad_w"))
    {
        padH = params.get<int>("pad_h");
        padW = params.get<int>("pad_w");
    }
    else
    {
        padH = padW = params.get<int>("pad", 0);
    }
    if (params.has("stride_h") && params.has("stride_w"))
    {
        strideH = params.get<int>("stride_h");
        strideW = params.get<int>("stride_w");
    }
    else
    {
        strideH = strideW = params.get<int>("stride", 1);
    }
    CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 & strideW > 0);
 }
 }
 }
--- a/modules/dnn/src/layers/layers_common.hpp
+++ b/modules/dnn/src/layers/layers_common.hpp
@ -0,0 +1,15 @@
 #ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
 #define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
 #include <opencv2/dnn.hpp>
 namespace cv
 {
 namespace dnn
 {
 void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW);
 }
 }
 #endif
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@ -0,0 +1,153 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include <float.h>
 #include <algorithm>
 using std::max;
 namespace cv
 {
 namespace dnn
 {
    class PoolingLayer : public Layer
    {
        enum 
        {
            MAX,
            AVE,
            STOCHASTIC
        };
        int type;
        int padH, padW;
        int strideH, strideW;
        int kernelH, kernelW;
        int inH, inW;
        int pooledH, pooledW;
        void computeOutputShape(int inH, int inW);
        void maxPooling(Blob &input, Blob &output);
    public:
        PoolingLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
    REGISTER_LAYER_CLASS(Pooling, PoolingLayer)
    PoolingLayer::PoolingLayer(LayerParams &params)
    {
        if (params.has("pool"))
        {
            String pool = params.get<String>("pool").toLowerCase();
            if (pool == "max")
                type = MAX;
            else if (pool == "ave")
                type = AVE;
            else if (pool == "stochastic")
                type = STOCHASTIC;
            else
                CV_Error(cv::Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
        }
        else
        {
            type = MAX;
        }
        getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW);
    }
    void PoolingLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        CV_Assert(inputs.size() > 0);
        inH = inputs[0]->cols();
        inW = inputs[0]->rows();
        computeOutputShape(inH, inW);
        outputs.resize(inputs.size());
        for (size_t i = 0; i < inputs.size(); i++)
        {
            CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW);
            outputs[i].create(inputs[i]->num(), inputs[i]->channels(), pooledH, pooledW);
        }
    }
    void PoolingLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        for (size_t ii = 0; ii < inputs.size(); ii++)
        {
            switch (type)
            {
            case MAX:
                maxPooling(*inputs[ii], outputs[ii]);
                break;
            default:
                CV_Error(cv::Error::StsNotImplemented, "Not implemented");
                break;
            }
        }
    }
    void PoolingLayer::maxPooling(Blob &input, Blob &output)
    {
        CV_DbgAssert(output.rows() == pooledH && output.cols() == pooledW);
        for (int n = 0; n < input.num(); ++n)
        {
            for (int c = 0; c < input.channels(); ++c)
            {
                float *srcData = input.ptr<float>(n, c);
                float *dstData = output.ptr<float>(n, c);
                for (int ph = 0; ph < pooledH; ++ph)
                {
                    for (int pw = 0; pw < pooledW; ++pw)
                    {
                        int hstart = ph * strideH - padH;
                        int wstart = pw * strideW - padW;
                        int hend = min(hstart + kernelH, inH);
                        int wend = min(wstart + kernelW, inW);
                        hstart = max(hstart, 0);
                        wstart = max(wstart, 0);
                        const int pool_index = ph * pooledW + pw;
                        float max_val = -FLT_MAX;
                        for (int h = hstart; h < hend; ++h)
                            for (int w = wstart; w < wend; ++w)
                            {
                                const int index = h * inW + w;
                                if (srcData[index] > max_val)
                                    max_val = srcData[index];
                            }
                        dstData[pool_index] = max_val;
                    }
                }
            }
        }
    }
    void PoolingLayer::computeOutputShape(int inH, int inW)
    {
        //Yeah, something strange Caffe scheme-)
        pooledH = static_cast<int>(ceil(static_cast<float>(inH + 2 * padH - kernelH) / strideH)) + 1;
        pooledW = static_cast<int>(ceil(static_cast<float>(inW + 2 * padW - kernelW) / strideW)) + 1;
        if (padH || padW)
        {
            // If we have padding, ensure that the last pooling starts strictly
            // inside the image (instead of at the padding); otherwise clip the last.
            if ((pooledH - 1) * strideH >= inH + padH)
                --pooledH;
            if ((pooledW - 1) * strideW >= inW + padW)
                --pooledW;
            CV_Assert((pooledH - 1) * strideH < inH + padH);
            CV_Assert((pooledW - 1) * strideW < inW + padW);
        }
    }
 }
 }