diff --git a/modules/dnn/src/layers.cpp b/modules/dnn/src/layers.cpp deleted file mode 100644 index c50b5b5f0..000000000 --- a/modules/dnn/src/layers.cpp +++ /dev/null @@ -1,391 +0,0 @@ -#include "precomp.hpp" -#include "layers.hpp" -#include -#include -#include -#include -using std::max; -using std::min; - -namespace cv -{ -namespace dnn -{ - -struct ReLUFunctor -{ - float negative_slope; - - ReLUFunctor(LayerParams ¶ms) - { - if (params.has("negative_slope")) - negative_slope = params.get("negative_slope"); - else - negative_slope = 0.f; - } - - inline float operator()(float x) - { - return (x >= 0) ? x : negative_slope * x; - } -}; - -struct TanHFunctor -{ - TanHFunctor(LayerParams ¶ms) {} - - inline float operator()(float x) - { - return tanh(x); - } -}; - -REGISTER_LAYER_CLASS(ReLU, ElementWiseLayer) -REGISTER_LAYER_CLASS(TanH, ElementWiseLayer) -REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer) -REGISTER_LAYER_CLASS(Pooling, PoolingLayer) -REGISTER_LAYER_CLASS(InnerProduct, FullyConnectedLayer) - -////////////////////////////////////////////////////////////////////////// - - -static void getKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW) -{ - if (params.has("kernel_h") && params.has("kernel_w")) - { - kernelH = params.get("kernel_h"); - kernelW = params.get("kernel_w"); - } - else if (params.has("kernel_size")) - { - kernelH = kernelW = params.get("kernel_size"); - } - else - { - CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified"); - } - - if (params.has("pad_h") && params.has("pad_w")) - { - padH = params.get("pad_h"); - padW = params.get("pad_w"); - } - else - { - padH = padW = params.get("pad", 0); - } - - if (params.has("stride_h") && params.has("stride_w")) - { - strideH = params.get("stride_h"); - strideW = params.get("stride_w"); - } - else - { - strideH = strideW = params.get("stride", 1); - } - - CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 & strideW > 0); -} - -PoolingLayer::PoolingLayer(LayerParams ¶ms) -{ - if (params.has("pool")) - { - String pool = params.get("pool").toLowerCase(); - if (pool == "max") - type = MAX; - else if (pool == "ave") - type = AVE; - else if (pool == "stochastic") - type = STOCHASTIC; - else - CV_Error(cv::Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - } - else - { - type = MAX; - } - - getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW); -} - -void PoolingLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() > 0); - - inH = inputs[0]->cols(); - inW = inputs[0]->rows(); - computeOutputShape(inH, inW); - - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) - { - CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW); - outputs[i].create(inputs[i]->num(), inputs[i]->channels(), pooledH, pooledW); - } -} - -void PoolingLayer::forward(std::vector &inputs, std::vector &outputs) -{ - for (size_t ii = 0; ii < inputs.size(); ii++) - { - switch (type) - { - case MAX: - maxPooling(*inputs[ii], outputs[ii]); - break; - default: - CV_Error(cv::Error::StsNotImplemented, "Not implemented"); - break; - } - } -} - -void PoolingLayer::maxPooling(Blob &input, Blob &output) -{ - CV_DbgAssert(output.rows() == pooledH && output.cols() == pooledW); - - for (int n = 0; n < input.num(); ++n) - { - for (int c = 0; c < input.channels(); ++c) - { - float *srcData = input.ptr(n, c); - float *dstData = output.ptr(n, c); - - for (int ph = 0; ph < pooledH; ++ph) - { - for (int pw = 0; pw < pooledW; ++pw) - { - int hstart = ph * strideH - padH; - int wstart = pw * strideW - padW; - int hend = min(hstart + kernelH, inH); - int wend = min(wstart + kernelW, inW); - hstart = max(hstart, 0); - wstart = max(wstart, 0); - const int pool_index = ph * pooledW + pw; - float max_val = -FLT_MAX; - - for (int h = hstart; h < hend; ++h) - for (int w = wstart; w < wend; ++w) - { - const int index = h * inW + w; - if (srcData[index] > max_val) - max_val = srcData[index]; - } - - dstData[pool_index] = max_val; - } - } - } - } -} - -void PoolingLayer::computeOutputShape(int inH, int inW) -{ - //Yeah something strange Caffe scheme-) - pooledH = static_cast(ceil(static_cast(inH + 2 * padH - kernelH) / strideH)) + 1; - pooledW = static_cast(ceil(static_cast(inW + 2 * padW - kernelW) / strideW)) + 1; - - if (padH || padW) - { - // If we have padding, ensure that the last pooling starts strictly - // inside the image (instead of at the padding); otherwise clip the last. - if ((pooledH - 1) * strideH >= inH + padH) - --pooledH; - if ((pooledW - 1) * strideW >= inW + padW) - --pooledW; - CV_Assert((pooledH - 1) * strideH < inH + padH); - CV_Assert((pooledW - 1) * strideW < inW + padW); - } -} - -////////////////////////////////////////////////////////////////////////// - -ConvolutionLayer::ConvolutionLayer(LayerParams ¶ms) -{ - getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW); - - numOutput = params.get("num_output"); - bias = params.get("bias_term", true); - group = params.get("group", 1); - CV_Assert(numOutput % group == 0); - - CV_Assert(params.learnedBlobs.size() >= 1 && (!bias || params.learnedBlobs.size() >= 2)); - learnedParams.assign(params.learnedBlobs.begin(), params.learnedBlobs.begin() + (bias ? 2 : 1)); - - Blob &weightBlob = learnedParams[0]; - CV_Assert(weightBlob.cols() == kernelW && weightBlob.rows() == kernelH && weightBlob.num() == numOutput); - - if (bias) - { - Blob &biasBlob = learnedParams[1]; - CV_Assert(biasBlob.total() == numOutput); - } -} - -void ConvolutionLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() > 0); - - Blob &weightBlob = learnedParams[0]; - - inCn = inputs[0]->channels(); - CV_Assert(inCn % group == 0 && weightBlob.channels() == inCn); - - inH = inputs[0]->rows(); - inW = inputs[0]->cols(); - computeOutputShape(inH, inW); - - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) - { - CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW && inputs[i]->channels() == inCn); - int num = inputs[i]->num(); - - outputs[i].create(num, numOutput, outH, outW); - } - - colCn = kernelH * kernelW * inCn; - imColsMat.create(colCn, outH * outW, CV_32F); - - if (bias) - { - biasOnesMat = Mat::ones(1, outH * outW, CV_32F); - } -} - - -template -void im2col_cpu(const Dtype* data_im, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - Dtype* data_col) -{ - int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; - int channels_col = channels * kernel_h * kernel_w; - for (int c = 0; c < channels_col; ++c) { - int w_offset = c % kernel_w; - int h_offset = (c / kernel_w) % kernel_h; - int c_im = c / kernel_h / kernel_w; - for (int h = 0; h < height_col; ++h) { - for (int w = 0; w < width_col; ++w) { - int h_pad = h * stride_h - pad_h + h_offset; - int w_pad = w * stride_w - pad_w + w_offset; - if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) - data_col[(c * height_col + h) * width_col + w] = - data_im[(c_im * height + h_pad) * width + w_pad]; - else - data_col[(c * height_col + h) * width_col + w] = 0; - } - } - } -} - -void ConvolutionLayer::forward(std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == outputs.size()); - - float *colPtr = imColsMat.ptr(); - float *weigtsPtr = learnedParams[0].ptr(); - float *biasPtr = (bias) ? learnedParams[1].ptr() : NULL; - - CV_Assert(group == 1); - - for (size_t ii = 0; ii < outputs.size(); ii++) - { - int num = inputs[ii]->num(); - - for (int n = 0; n < num; n++) - { - float *srcImPtr = inputs[ii]->ptr(n); - float *dstImPtr = outputs[ii].ptr(n); - - im2col_cpu(srcImPtr, inCn, inH, inW, kernelH, kernelW, padH, padW, strideH, strideW, colPtr); - - Mat weightsMat(numOutput, colCn, CV_32F, weigtsPtr); - Mat dstIm(numOutput, outH*outW, CV_32F, dstImPtr); - - cv::gemm(weightsMat, imColsMat, 1, noArray(), 0, dstIm); - - if (bias) - { - Mat biasMat(numOutput, 1, CV_32F, biasPtr); - cv::gemm(biasMat, biasOnesMat, 1, dstIm, 1, dstIm); - } - } - } -} - -void ConvolutionLayer::computeOutputShape(int inH, int inW) -{ - outH = (inH + 2 * padH - kernelH) / strideH + 1; - outW = (inW + 2 * padW - kernelW) / strideW + 1; -} - -////////////////////////////////////////////////////////////////////////// - -FullyConnectedLayer::FullyConnectedLayer(LayerParams ¶ms) -{ - numOutputs = params.get("num_output"); - bias = params.get("bias_term", true); - - CV_Assert(params.learnedBlobs.size() >= 1); - CV_Assert(!bias || (params.learnedBlobs.size() >= 2 && params.learnedBlobs[1].total() == numOutputs)); - - learnedParams.resize(bias ? 2 : 1); - learnedParams[0] = params.learnedBlobs[0]; - if (bias) - { - learnedParams[1] = params.learnedBlobs[1]; - } -} - -void FullyConnectedLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() > 0); - - inC = inputs[0]->channels(); - inH = inputs[0]->rows(); - inW = inputs[0]->cols(); - inSize = inC * inH * inW; - - CV_Assert(inSize * numOutputs == learnedParams[0].total()); - - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) - { - if (i != 0) - CV_Assert(inputs[i]->channels() == inC && inputs[i]->rows() == inH && inputs[i]->cols() == inW); - - outputs[i].create(inputs[i]->num(), numOutputs, 1, 1); - } -} - -void FullyConnectedLayer::forward(std::vector &inputs, std::vector &outputs) -{ - for (size_t i = 0; i < inputs.size(); i++) - { - int M = inputs[i]->num(); - int N = numOutputs; - int K = inSize; - - Mat srcMat(M, K, CV_32F, inputs[i]->ptr()); - Mat weights(K, N, CV_32F, learnedParams[0].ptr()); - Mat dstMat(M, N, CV_32F, outputs[i].ptr()); - - cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat); - - if (bias) - { - Mat biasOnesMat = Mat::ones(M, 1, CV_32F); - Mat biasMat(1, N, CV_32F, learnedParams[1].ptr()); - cv::gemm(biasOnesMat, biasMat, 1, dstMat, 1, dstMat); - } - } -} - -} -} \ No newline at end of file diff --git a/modules/dnn/src/layers.hpp b/modules/dnn/src/layers.hpp deleted file mode 100644 index d65f53eee..000000000 --- a/modules/dnn/src/layers.hpp +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef __OPENCV_DNN_LAYERS_HPP__ -#define __OPENCV_DNN_LAYERS_HPP__ -#include - -namespace cv -{ -namespace dnn -{ - - template - class ElementWiseLayer : public Layer - { - Func func; - public: - - ElementWiseLayer(LayerParams &_params) : func(_params) {} - - void allocate(const std::vector &inputs, std::vector &outputs) - { - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) - outputs[i] = *inputs[i]; - } - - void forward(std::vector &inputs, std::vector &outputs) - { - CV_Assert(inputs.size() == outputs.size()); - - for (size_t i = 0; i < inputs.size(); i++) - { - CV_Assert(inputs[i]->ptr() == outputs[i].ptr()); - float *data = outputs[i].ptr(); - size_t size = outputs[i].total(); - - //Vec4i shape = outputs[0].shape(); - //CV_Assert(pitch[i] == shape[i] * sizeof(float) ); - - for (size_t j = 0; j < size; j++) - data[j] = func(data[j]); - } - } - }; - - class PoolingLayer : public Layer - { - enum - { - MAX, - AVE, - STOCHASTIC - }; - - int type; - int padH, padW; - int strideH, strideW; - int kernelH, kernelW; - - int inH, inW; - int pooledH, pooledW; - - void computeOutputShape(int inH, int inW); - void maxPooling(Blob &input, Blob &output); - - public: - PoolingLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - }; - - class ConvolutionLayer : public Layer - { - bool bias; - int numOutput, group; - int padH, padW; - int strideH, strideW; - int kernelH, kernelW; - - int inH, inW, inCn, colCn; - int outH, outW; - - Mat imColsMat, biasOnesMat; - - void computeOutputShape(int inH, int inW); - - public: - ConvolutionLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - }; - - class FullyConnectedLayer : public Layer - { - bool bias; - int numOutputs; - - int inC, inH, inW; - size_t inSize; - - public: - FullyConnectedLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - }; -} -} - - -#endif \ No newline at end of file diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp new file mode 100644 index 000000000..f1be3bdac --- /dev/null +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -0,0 +1,157 @@ +#include "../precomp.hpp" +#include "layers_common.hpp" + +namespace cv +{ +namespace dnn +{ + //TODO: implement group parameter + //TODO: simultaneously convolution and bias addition for cache optimization + class ConvolutionLayer : public Layer + { + bool bias; + int numOutput, group; + int padH, padW; + int strideH, strideW; + int kernelH, kernelW; + + int inH, inW, inCn, colCn; + int outH, outW; + + Mat imColsMat, biasOnesMat; + + void computeOutputShape(int inH, int inW); + + public: + ConvolutionLayer(LayerParams ¶ms); + void allocate(const std::vector &inputs, std::vector &outputs); + void forward(std::vector &inputs, std::vector &outputs); + }; + + + REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer) + + + ConvolutionLayer::ConvolutionLayer(LayerParams ¶ms) + { + getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW); + + numOutput = params.get("num_output"); + bias = params.get("bias_term", true); + group = params.get("group", 1); + CV_Assert(numOutput % group == 0); + + CV_Assert(params.learnedBlobs.size() >= 1 && (!bias || params.learnedBlobs.size() >= 2)); + learnedParams.assign(params.learnedBlobs.begin(), params.learnedBlobs.begin() + (bias ? 2 : 1)); + + Blob &weightBlob = learnedParams[0]; + CV_Assert(weightBlob.cols() == kernelW && weightBlob.rows() == kernelH && weightBlob.num() == numOutput); + + if (bias) + { + Blob &biasBlob = learnedParams[1]; + CV_Assert(biasBlob.total() == numOutput); + } + } + + void ConvolutionLayer::allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() > 0); + + Blob &weightBlob = learnedParams[0]; + + inCn = inputs[0]->channels(); + CV_Assert(inCn % group == 0 && weightBlob.channels() == inCn); + + inH = inputs[0]->rows(); + inW = inputs[0]->cols(); + computeOutputShape(inH, inW); + + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW && inputs[i]->channels() == inCn); + int num = inputs[i]->num(); + + outputs[i].create(num, numOutput, outH, outW); + } + + colCn = kernelH * kernelW * inCn; + imColsMat.create(colCn, outH * outW, CV_32F); + + if (bias) + { + biasOnesMat = Mat::ones(1, outH * outW, CV_32F); + } + } + + template + void im2col_cpu(const Dtype* data_im, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + Dtype* data_col) + { + int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; + int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + int channels_col = channels * kernel_h * kernel_w; + for (int c = 0; c < channels_col; ++c) { + int w_offset = c % kernel_w; + int h_offset = (c / kernel_w) % kernel_h; + int c_im = c / kernel_h / kernel_w; + for (int h = 0; h < height_col; ++h) { + for (int w = 0; w < width_col; ++w) { + int h_pad = h * stride_h - pad_h + h_offset; + int w_pad = w * stride_w - pad_w + w_offset; + if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) + data_col[(c * height_col + h) * width_col + w] = + data_im[(c_im * height + h_pad) * width + w_pad]; + else + data_col[(c * height_col + h) * width_col + w] = 0; + } + } + } + } + + void ConvolutionLayer::forward(std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == outputs.size()); + + float *colPtr = imColsMat.ptr(); + float *weigtsPtr = learnedParams[0].ptr(); + float *biasPtr = (bias) ? learnedParams[1].ptr() : NULL; + + CV_Assert(group == 1); + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + int num = inputs[ii]->num(); + + for (int n = 0; n < num; n++) + { + float *srcImPtr = inputs[ii]->ptr(n); + float *dstImPtr = outputs[ii].ptr(n); + + im2col_cpu(srcImPtr, inCn, inH, inW, kernelH, kernelW, padH, padW, strideH, strideW, colPtr); + + Mat weightsMat(numOutput, colCn, CV_32F, weigtsPtr); + Mat dstIm(numOutput, outH*outW, CV_32F, dstImPtr); + + cv::gemm(weightsMat, imColsMat, 1, noArray(), 0, dstIm); + + if (bias) + { + Mat biasMat(numOutput, 1, CV_32F, biasPtr); + cv::gemm(biasMat, biasOnesMat, 1, dstIm, 1, dstIm); + } + } + } + } + + void ConvolutionLayer::computeOutputShape(int inH, int inW) + { + outH = (inH + 2 * padH - kernelH) / strideH + 1; + outW = (inW + 2 * padW - kernelW) / strideW + 1; + } +} +} \ No newline at end of file diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp new file mode 100644 index 000000000..0023c67eb --- /dev/null +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -0,0 +1,74 @@ +#include "../precomp.hpp" +#include "layers_common.hpp" +#include + +namespace cv +{ +namespace dnn +{ + + template + class ElementWiseLayer : public Layer + { + Func func; + public: + + ElementWiseLayer(LayerParams &_params) : func(_params) {} + + void allocate(const std::vector &inputs, std::vector &outputs) + { + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + outputs[i] = *inputs[i]; //no data copy + } + + void forward(std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == outputs.size()); + + for (size_t i = 0; i < inputs.size(); i++) + { + CV_Assert(inputs[i]->ptr() == outputs[i].ptr()); + float *data = outputs[i].ptr(); + size_t size = outputs[i].total(); + + for (size_t j = 0; j < size; j++) + data[j] = func(data[j]); + } + } + }; + + + struct ReLUFunctor + { + float negative_slope; + + ReLUFunctor(LayerParams ¶ms) + { + if (params.has("negative_slope")) + negative_slope = params.get("negative_slope"); + else + negative_slope = 0.f; + } + + inline float operator()(float x) + { + return (x >= 0) ? x : negative_slope * x; + } + }; + + struct TanHFunctor + { + TanHFunctor(LayerParams ¶ms) {} + + inline float operator()(float x) + { + return tanh(x); + } + }; + + REGISTER_LAYER_CLASS(ReLU, ElementWiseLayer) + REGISTER_LAYER_CLASS(TanH, ElementWiseLayer) + +} +} \ No newline at end of file diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp new file mode 100644 index 000000000..3c605e4d9 --- /dev/null +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -0,0 +1,87 @@ +#include "../precomp.hpp" +#include "layers_common.hpp" + +namespace cv +{ +namespace dnn +{ + //TODO: implement axis number parameter + class FullyConnectedLayer : public Layer + { + bool bias; + int numOutputs; + + int inC, inH, inW; + size_t inSize; + + public: + FullyConnectedLayer(LayerParams ¶ms); + void allocate(const std::vector &inputs, std::vector &outputs); + void forward(std::vector &inputs, std::vector &outputs); + }; + + + REGISTER_LAYER_CLASS(InnerProduct, FullyConnectedLayer) + + + FullyConnectedLayer::FullyConnectedLayer(LayerParams ¶ms) + { + numOutputs = params.get("num_output"); + bias = params.get("bias_term", true); + + CV_Assert(params.learnedBlobs.size() >= 1); + CV_Assert(!bias || (params.learnedBlobs.size() >= 2 && params.learnedBlobs[1].total() == numOutputs)); + + learnedParams.resize(bias ? 2 : 1); + learnedParams[0] = params.learnedBlobs[0]; + if (bias) + { + learnedParams[1] = params.learnedBlobs[1]; + } + } + + void FullyConnectedLayer::allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() > 0); + + inC = inputs[0]->channels(); + inH = inputs[0]->rows(); + inW = inputs[0]->cols(); + inSize = inC * inH * inW; + + CV_Assert(inSize * numOutputs == learnedParams[0].total()); + + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + if (i != 0) + CV_Assert(inputs[i]->channels() == inC && inputs[i]->rows() == inH && inputs[i]->cols() == inW); + + outputs[i].create(inputs[i]->num(), numOutputs, 1, 1); + } + } + + void FullyConnectedLayer::forward(std::vector &inputs, std::vector &outputs) + { + for (size_t i = 0; i < inputs.size(); i++) + { + int M = inputs[i]->num(); + int N = numOutputs; + int K = inSize; + + Mat srcMat(M, K, CV_32F, inputs[i]->ptr()); + Mat weights(K, N, CV_32F, learnedParams[0].ptr()); + Mat dstMat(M, N, CV_32F, outputs[i].ptr()); + + cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat); + + if (bias) + { + Mat biasOnesMat = Mat::ones(M, 1, CV_32F); + Mat biasMat(1, N, CV_32F, learnedParams[1].ptr()); + cv::gemm(biasOnesMat, biasMat, 1, dstMat, 1, dstMat); + } + } + } +} +} \ No newline at end of file diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp new file mode 100644 index 000000000..e0e141c96 --- /dev/null +++ b/modules/dnn/src/layers/layers_common.cpp @@ -0,0 +1,48 @@ +#include "layers_common.hpp" + +namespace cv +{ +namespace dnn +{ + +void getKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW) +{ + if (params.has("kernel_h") && params.has("kernel_w")) + { + kernelH = params.get("kernel_h"); + kernelW = params.get("kernel_w"); + } + else if (params.has("kernel_size")) + { + kernelH = kernelW = params.get("kernel_size"); + } + else + { + CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified"); + } + + if (params.has("pad_h") && params.has("pad_w")) + { + padH = params.get("pad_h"); + padW = params.get("pad_w"); + } + else + { + padH = padW = params.get("pad", 0); + } + + if (params.has("stride_h") && params.has("stride_w")) + { + strideH = params.get("stride_h"); + strideW = params.get("stride_w"); + } + else + { + strideH = strideW = params.get("stride", 1); + } + + CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 & strideW > 0); +} + +} +} \ No newline at end of file diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp new file mode 100644 index 000000000..117c3b676 --- /dev/null +++ b/modules/dnn/src/layers/layers_common.hpp @@ -0,0 +1,15 @@ +#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__ +#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__ +#include + +namespace cv +{ +namespace dnn +{ + +void getKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW); + +} +} + +#endif \ No newline at end of file diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp new file mode 100644 index 000000000..c4bf4062b --- /dev/null +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -0,0 +1,153 @@ +#include "../precomp.hpp" +#include "layers_common.hpp" +#include +#include +using std::max; + +namespace cv +{ +namespace dnn +{ + class PoolingLayer : public Layer + { + enum + { + MAX, + AVE, + STOCHASTIC + }; + + int type; + int padH, padW; + int strideH, strideW; + int kernelH, kernelW; + + int inH, inW; + int pooledH, pooledW; + + void computeOutputShape(int inH, int inW); + void maxPooling(Blob &input, Blob &output); + + public: + PoolingLayer(LayerParams ¶ms); + void allocate(const std::vector &inputs, std::vector &outputs); + void forward(std::vector &inputs, std::vector &outputs); + }; + + + REGISTER_LAYER_CLASS(Pooling, PoolingLayer) + + + PoolingLayer::PoolingLayer(LayerParams ¶ms) + { + if (params.has("pool")) + { + String pool = params.get("pool").toLowerCase(); + if (pool == "max") + type = MAX; + else if (pool == "ave") + type = AVE; + else if (pool == "stochastic") + type = STOCHASTIC; + else + CV_Error(cv::Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); + } + else + { + type = MAX; + } + + getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW); + } + + void PoolingLayer::allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() > 0); + + inH = inputs[0]->cols(); + inW = inputs[0]->rows(); + computeOutputShape(inH, inW); + + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW); + outputs[i].create(inputs[i]->num(), inputs[i]->channels(), pooledH, pooledW); + } + } + + void PoolingLayer::forward(std::vector &inputs, std::vector &outputs) + { + for (size_t ii = 0; ii < inputs.size(); ii++) + { + switch (type) + { + case MAX: + maxPooling(*inputs[ii], outputs[ii]); + break; + default: + CV_Error(cv::Error::StsNotImplemented, "Not implemented"); + break; + } + } + } + + void PoolingLayer::maxPooling(Blob &input, Blob &output) + { + CV_DbgAssert(output.rows() == pooledH && output.cols() == pooledW); + + for (int n = 0; n < input.num(); ++n) + { + for (int c = 0; c < input.channels(); ++c) + { + float *srcData = input.ptr(n, c); + float *dstData = output.ptr(n, c); + + for (int ph = 0; ph < pooledH; ++ph) + { + for (int pw = 0; pw < pooledW; ++pw) + { + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int hend = min(hstart + kernelH, inH); + int wend = min(wstart + kernelW, inW); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + const int pool_index = ph * pooledW + pw; + float max_val = -FLT_MAX; + + for (int h = hstart; h < hend; ++h) + for (int w = wstart; w < wend; ++w) + { + const int index = h * inW + w; + if (srcData[index] > max_val) + max_val = srcData[index]; + } + + dstData[pool_index] = max_val; + } + } + } + } + } + + void PoolingLayer::computeOutputShape(int inH, int inW) + { + //Yeah, something strange Caffe scheme-) + pooledH = static_cast(ceil(static_cast(inH + 2 * padH - kernelH) / strideH)) + 1; + pooledW = static_cast(ceil(static_cast(inW + 2 * padW - kernelW) / strideW)) + 1; + + if (padH || padW) + { + // If we have padding, ensure that the last pooling starts strictly + // inside the image (instead of at the padding); otherwise clip the last. + if ((pooledH - 1) * strideH >= inH + padH) + --pooledH; + if ((pooledW - 1) * strideW >= inW + padW) + --pooledW; + CV_Assert((pooledH - 1) * strideH < inH + padH); + CV_Assert((pooledW - 1) * strideW < inW + padW); + } + } +} +} \ No newline at end of file