|
|
|
@ -1,5 +1,6 @@ |
|
|
|
|
#include "../precomp.hpp" |
|
|
|
|
#include "layers_common.hpp" |
|
|
|
|
#include "im2col.hpp" |
|
|
|
|
|
|
|
|
|
namespace cv |
|
|
|
|
{ |
|
|
|
@ -8,19 +9,24 @@ namespace dnn |
|
|
|
|
//TODO: simultaneously convolution and bias addition for cache optimization
|
|
|
|
|
class ConvolutionLayer : public Layer |
|
|
|
|
{ |
|
|
|
|
protected: |
|
|
|
|
bool bias; |
|
|
|
|
int numOutput, group; |
|
|
|
|
int padH, padW; |
|
|
|
|
int kerH, kerW; |
|
|
|
|
int strideH, strideW; |
|
|
|
|
int kernelH, kernelW; |
|
|
|
|
|
|
|
|
|
int inH, inW, inCn, kerSize; |
|
|
|
|
int outH, outW; |
|
|
|
|
int groupCn, groupCnOut; |
|
|
|
|
int inpH, inpW, inpCn; |
|
|
|
|
int outH, outW, outCn; |
|
|
|
|
int topH, topW, topCn; //switched between inp/out on deconv/conv
|
|
|
|
|
int inpGroupCn, outGroupCn; |
|
|
|
|
int ksize; |
|
|
|
|
|
|
|
|
|
Mat srcColsMat, biasOnesMat; |
|
|
|
|
Mat colMat, biasOnesMat; |
|
|
|
|
|
|
|
|
|
void computeOutputShape(int inH, int inW); |
|
|
|
|
inline bool is1x1() const; |
|
|
|
|
virtual void computeInpOutShape(const Blob &inpBlob); |
|
|
|
|
void im2col(Blob &inpBlob, int imNum, int cnGroup); |
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
ConvolutionLayer(LayerParams ¶ms); |
|
|
|
@ -28,13 +34,25 @@ namespace dnn |
|
|
|
|
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
class DeConvolutionLayer : public ConvolutionLayer |
|
|
|
|
{ |
|
|
|
|
protected: |
|
|
|
|
void computeInpOutShape(const Blob &inpBlob); |
|
|
|
|
void col2im(Mat &dstMat); |
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
DeConvolutionLayer(LayerParams ¶ms) : ConvolutionLayer(params) {} |
|
|
|
|
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer) |
|
|
|
|
REGISTER_LAYER_CLASS(Deconvolution, DeConvolutionLayer) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ConvolutionLayer::ConvolutionLayer(LayerParams ¶ms) |
|
|
|
|
{ |
|
|
|
|
getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW); |
|
|
|
|
getKernelParams(params, kerH, kerW, padH, padW, strideH, strideW); |
|
|
|
|
|
|
|
|
|
numOutput = params.get<int>("num_output"); |
|
|
|
|
bias = params.get<bool>("bias_term", true); |
|
|
|
@ -44,8 +62,8 @@ namespace dnn |
|
|
|
|
CV_Assert(params.learnedBlobs.size() >= 1 && (!bias || params.learnedBlobs.size() >= 2)); |
|
|
|
|
learnedParams.assign(params.learnedBlobs.begin(), params.learnedBlobs.begin() + (bias ? 2 : 1)); |
|
|
|
|
|
|
|
|
|
Blob &weightBlob = learnedParams[0]; |
|
|
|
|
CV_Assert(weightBlob.cols() == kernelW && weightBlob.rows() == kernelH && weightBlob.num() == numOutput); |
|
|
|
|
const Blob &wgtBlob = learnedParams[0]; |
|
|
|
|
CV_Assert(wgtBlob.dims() == 4 && wgtBlob.cols() == kerW && wgtBlob.rows() == kerH && wgtBlob.num() == numOutput); |
|
|
|
|
|
|
|
|
|
if (bias) |
|
|
|
|
{ |
|
|
|
@ -58,92 +76,141 @@ namespace dnn |
|
|
|
|
{ |
|
|
|
|
CV_Assert(inputs.size() > 0); |
|
|
|
|
|
|
|
|
|
Blob &weightBlob = learnedParams[0]; |
|
|
|
|
const Blob &inpBlob = *inputs[0]; |
|
|
|
|
CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F); |
|
|
|
|
computeInpOutShape(inpBlob); |
|
|
|
|
|
|
|
|
|
inCn = inputs[0]->channels(); |
|
|
|
|
CV_Assert(inCn % group == 0 && numOutput % group == 0 && weightBlob.channels() == inCn/group); |
|
|
|
|
groupCnOut = numOutput / group; |
|
|
|
|
groupCn = inCn / group; |
|
|
|
|
CV_Assert(inpCn % group == 0 && outCn % group == 0); |
|
|
|
|
CV_Assert(learnedParams[0].channels() == inpCn / group); |
|
|
|
|
CV_Assert(learnedParams[0].num() == outCn); |
|
|
|
|
|
|
|
|
|
inH = inputs[0]->rows(); |
|
|
|
|
inW = inputs[0]->cols(); |
|
|
|
|
computeOutputShape(inH, inW); |
|
|
|
|
outGroupCn = outCn / group; |
|
|
|
|
inpGroupCn = inpCn / group; |
|
|
|
|
ksize = inpGroupCn * kerH * kerW; |
|
|
|
|
|
|
|
|
|
outputs.resize(inputs.size()); |
|
|
|
|
for (size_t i = 0; i < inputs.size(); i++) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(inputs[i]->rows() == inH && inputs[i]->cols() == inW && inputs[i]->channels() == inCn); |
|
|
|
|
outputs[i].create(BlobShape(inputs[i]->num(), numOutput, outH, outW)); |
|
|
|
|
CV_Assert(inputs[i]->type() == inpBlob.type()); |
|
|
|
|
CV_Assert(inputs[i]->dims() == 4 && inputs[i]->channels() == inpBlob.channels()); |
|
|
|
|
CV_Assert(inputs[i]->rows() == inpBlob.rows() && inputs[i]->cols() == inpBlob.cols()); |
|
|
|
|
|
|
|
|
|
outputs[i].create(BlobShape(inputs[i]->num(), topCn, topH, topW)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
kerSize = kernelH * kernelW * groupCn; |
|
|
|
|
srcColsMat.create(kerSize, outH * outW, CV_32F); |
|
|
|
|
if (!is1x1()) |
|
|
|
|
colMat.create(ksize, outH * outW, inpBlob.type()); |
|
|
|
|
|
|
|
|
|
if (bias) |
|
|
|
|
{ |
|
|
|
|
biasOnesMat = Mat::ones(1, outH * outW, CV_32F); |
|
|
|
|
} |
|
|
|
|
biasOnesMat = Mat::ones(1, topH * topW, inpBlob.type()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
template <typename Dtype> |
|
|
|
|
void im2col_cpu(const Dtype* data_im, const int channels, |
|
|
|
|
const int height, const int width, const int kernel_h, const int kernel_w, |
|
|
|
|
const int pad_h, const int pad_w, |
|
|
|
|
const int stride_h, const int stride_w, |
|
|
|
|
Dtype* data_col) |
|
|
|
|
inline bool ConvolutionLayer::is1x1() const |
|
|
|
|
{ |
|
|
|
|
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; |
|
|
|
|
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; |
|
|
|
|
int channels_col = channels * kernel_h * kernel_w; |
|
|
|
|
for (int c = 0; c < channels_col; ++c) { |
|
|
|
|
int w_offset = c % kernel_w; |
|
|
|
|
int h_offset = (c / kernel_w) % kernel_h; |
|
|
|
|
int c_im = c / kernel_h / kernel_w; |
|
|
|
|
for (int h = 0; h < height_col; ++h) { |
|
|
|
|
for (int w = 0; w < width_col; ++w) { |
|
|
|
|
int h_pad = h * stride_h - pad_h + h_offset; |
|
|
|
|
int w_pad = w * stride_w - pad_w + w_offset; |
|
|
|
|
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) |
|
|
|
|
data_col[(c * height_col + h) * width_col + w] = |
|
|
|
|
data_im[(c_im * height + h_pad) * width + w_pad]; |
|
|
|
|
else |
|
|
|
|
data_col[(c * height_col + h) * width_col + w] = 0; |
|
|
|
|
return (kerH == 1 && kerW == 1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
|
|
|
|
{ |
|
|
|
|
Blob &wgtBlob = learnedParams[0]; |
|
|
|
|
|
|
|
|
|
for (size_t ii = 0; ii < outputs.size(); ii++) |
|
|
|
|
{ |
|
|
|
|
Blob &inpBlob = *inputs[ii]; |
|
|
|
|
Blob &outBlob = outputs[ii]; |
|
|
|
|
|
|
|
|
|
for (int n = 0; n < inpBlob.num(); n++) |
|
|
|
|
{ |
|
|
|
|
for (int g = 0; g < group; g++) |
|
|
|
|
{ |
|
|
|
|
im2col(inpBlob, n, g); |
|
|
|
|
|
|
|
|
|
Mat kerMat(outGroupCn, ksize, wgtBlob.type(), wgtBlob.ptrRaw(g*outGroupCn)); |
|
|
|
|
Mat dstMat(outGroupCn, outH*outW, outBlob.type(), outBlob.ptrRaw(n, g*outGroupCn)); |
|
|
|
|
|
|
|
|
|
cv::gemm(kerMat, colMat, 1, noArray(), 0, dstMat); |
|
|
|
|
|
|
|
|
|
if (bias) |
|
|
|
|
{ |
|
|
|
|
float *biasPtr = learnedParams[1].ptrf() + g*outGroupCn; |
|
|
|
|
Mat biasMat(outGroupCn, 1, CV_32F, biasPtr); |
|
|
|
|
cv::gemm(biasMat, biasOnesMat, 1, dstMat, 1, dstMat); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
|
|
|
|
void ConvolutionLayer::im2col(Blob &inpBlob, int imNum, int cnGroup) |
|
|
|
|
{ |
|
|
|
|
uchar *srcPtr = inpBlob.ptrRaw(imNum, cnGroup*inpGroupCn); |
|
|
|
|
|
|
|
|
|
if (is1x1()) |
|
|
|
|
{ |
|
|
|
|
colMat = Mat(ksize, inpBlob.rows()*inpBlob.cols(), inpBlob.type(), srcPtr); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (inpBlob.type() == CV_32F) |
|
|
|
|
im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr()); |
|
|
|
|
if (inpBlob.type() == CV_64F) |
|
|
|
|
im2col_cpu((double*)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (double*)colMat.ptr()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ConvolutionLayer::computeInpOutShape(const Blob &inpBlob) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(inputs.size() == outputs.size()); |
|
|
|
|
inpH = inpBlob.rows(); |
|
|
|
|
inpW = inpBlob.cols(); |
|
|
|
|
inpCn = inpBlob.channels(); |
|
|
|
|
|
|
|
|
|
float *srcColPtr = srcColsMat.ptr<float>(); |
|
|
|
|
outH = (inpH + 2 * padH - kerH) / strideH + 1; |
|
|
|
|
outW = (inpW + 2 * padW - kerW) / strideW + 1; |
|
|
|
|
outCn = learnedParams[0].num(); |
|
|
|
|
|
|
|
|
|
topH = outH; topW = outW; topCn = outCn; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void DeConvolutionLayer::computeInpOutShape(const Blob &inpBlob) |
|
|
|
|
{ |
|
|
|
|
outH = inpBlob.rows(); |
|
|
|
|
outW = inpBlob.cols(); |
|
|
|
|
outCn = inpBlob.channels(); |
|
|
|
|
|
|
|
|
|
inpH = strideH * (outH - 1) + kerH - 2 * padH; |
|
|
|
|
inpW = strideW * (outW - 1) + kerW - 2 * padW; |
|
|
|
|
inpCn = learnedParams[0].channels(); |
|
|
|
|
|
|
|
|
|
topH = inpH; topW = inpW; topCn = inpCn; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void DeConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) |
|
|
|
|
{ |
|
|
|
|
Blob &wghtBlob = learnedParams[0]; |
|
|
|
|
|
|
|
|
|
for (size_t ii = 0; ii < outputs.size(); ii++) |
|
|
|
|
{ |
|
|
|
|
Blob &input = *inputs[ii]; |
|
|
|
|
Blob &output = outputs[ii]; |
|
|
|
|
int num = input.num(); |
|
|
|
|
Blob &convBlob = *inputs[ii]; |
|
|
|
|
Blob &decnBlob = outputs[ii]; |
|
|
|
|
|
|
|
|
|
for (int n = 0; n < num; n++) |
|
|
|
|
for (int n = 0; n < convBlob.num(); n++) |
|
|
|
|
{ |
|
|
|
|
for (int g = 0; g < group; g++) |
|
|
|
|
{ |
|
|
|
|
float *srcPtr = input.ptrf(n, g*groupCn); |
|
|
|
|
im2col_cpu(srcPtr, groupCn, inH, inW, kernelH, kernelW, padH, padW, strideH, strideW, srcColPtr); |
|
|
|
|
Mat dstMat(inpGroupCn, inpH*inpW, decnBlob.type(), decnBlob.ptrRaw(n, g*inpGroupCn)); |
|
|
|
|
|
|
|
|
|
if (is1x1()) |
|
|
|
|
colMat = dstMat; |
|
|
|
|
|
|
|
|
|
float *kerPtr = learnedParams[0].ptrf(g*groupCnOut); |
|
|
|
|
float *dstPtr = output.ptrf(n, g*groupCnOut); |
|
|
|
|
Mat convMat(outGroupCn, outH*outW, convBlob.type(), convBlob.ptrRaw(n, g*inpGroupCn)); |
|
|
|
|
Mat wghtMat(outGroupCn, ksize, wghtBlob.type(), wghtBlob.ptrRaw(g*inpGroupCn)); |
|
|
|
|
cv::gemm(wghtMat, convMat, 1, noArray(), 0, colMat, GEMM_1_T); |
|
|
|
|
|
|
|
|
|
Mat kerMat(groupCnOut, kerSize, CV_32F, kerPtr); |
|
|
|
|
Mat dstMat(groupCnOut, outH*outW, CV_32F, dstPtr); |
|
|
|
|
|
|
|
|
|
cv::gemm(kerMat, srcColsMat, 1, noArray(), 0, dstMat); |
|
|
|
|
col2im(dstMat); |
|
|
|
|
|
|
|
|
|
if (bias) |
|
|
|
|
{ |
|
|
|
|
float *biasPtr = learnedParams[1].ptrf() + g*groupCnOut; |
|
|
|
|
Mat biasMat(groupCnOut, 1, CV_32F, biasPtr); |
|
|
|
|
float *biasPtr = learnedParams[1].ptrf() + g*outGroupCn; |
|
|
|
|
Mat biasMat(outGroupCn, 1, CV_32F, biasPtr); |
|
|
|
|
cv::gemm(biasMat, biasOnesMat, 1, dstMat, 1, dstMat); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -151,10 +218,14 @@ namespace dnn |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ConvolutionLayer::computeOutputShape(int inH, int inW) |
|
|
|
|
void DeConvolutionLayer::col2im(Mat &dstMat) |
|
|
|
|
{ |
|
|
|
|
outH = (inH + 2 * padH - kernelH) / strideH + 1; |
|
|
|
|
outW = (inW + 2 * padW - kernelW) / strideW + 1; |
|
|
|
|
if (is1x1()) return; |
|
|
|
|
|
|
|
|
|
if (dstMat.type() == CV_32F) |
|
|
|
|
col2im_cpu((float*)colMat.ptr(), inpCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float*)dstMat.ptr()); |
|
|
|
|
if (dstMat.type() == CV_64F) |
|
|
|
|
col2im_cpu((double*)colMat.ptr(), inpCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (double*)dstMat.ptr()); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|