From 983823468d79b60530e0c0740c85c8606c76db31 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 2 Jul 2015 12:25:30 +0300 Subject: [PATCH] Some layer fixes. Added concat layer and avg. pooling to sucesfully run GoogLeNet. Added concat layer, implemented average pooling to run GoogLeNet. Fixed transpose error in FullyConnected layer (hotfix in softmax layer). Added GoogleNet test and updated AlexNet test (both nets now work fine). --- modules/dnn/src/layers/concat_layer.cpp | 82 +++++++++++++++++++ .../dnn/src/layers/fully_connected_layer.cpp | 44 ++++++---- modules/dnn/src/layers/pooling_layer.cpp | 40 +++++++++ modules/dnn/src/layers/softmax_layer.cpp | 6 +- modules/dnn/test/test_alexnet.cpp | 17 +--- modules/dnn/test/test_googlenet.cpp | 62 ++++++++++++++ .../dnn/testdata/dnn/bvlc_alexnet.prototxt | 4 +- 7 files changed, 221 insertions(+), 34 deletions(-) create mode 100644 modules/dnn/src/layers/concat_layer.cpp create mode 100644 modules/dnn/test/test_googlenet.cpp diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp new file mode 100644 index 000000000..70eae6d12 --- /dev/null +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -0,0 +1,82 @@ +#include "../precomp.hpp" +#include "layers_common.hpp" +#include +#include + +namespace cv +{ +namespace dnn +{ + class ConcatLayer : public Layer + { + int axis; + + public: + ConcatLayer(LayerParams& params); + void allocate(const std::vector &inputs, std::vector &outputs); + void forward(std::vector &inputs, std::vector &outputs); + }; + + + REGISTER_LAYER_CLASS(Concat, ConcatLayer) + + + ConcatLayer::ConcatLayer(LayerParams ¶ms) + { + axis = params.get("axis", 1); + CV_Assert(axis == 0 || axis == 1); + } + + void ConcatLayer::allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() > 0); + + int axisSum = 0; + for (size_t i = 0; i < inputs.size(); i++) + { + Vec4i refShape = inputs[0]->shape(); + Vec4i curShape = inputs[i]->shape(); + + for (int axisId = 0; axisId < 4; axisId++) + { + if (axisId != axis && refShape[axisId] != curShape[axisId]) + CV_Error(cv::Error::StsBadArg, "Inconsitent shape for ConcatLayer"); + } + + axisSum += curShape[axis]; + } + + Vec4i shape = inputs[0]->shape(); + shape[axis] = axisSum; + outputs.resize(1); + outputs[0].create(shape); + } + + void ConcatLayer::forward(std::vector &inputs, std::vector &outputs) + { + float *dstPtr = outputs[0].ptr(); + + if (axis == 0) + { + for (size_t i = 0; i < inputs.size(); i++) + { + const float *srcPtr = inputs[i]->ptr(); + memcpy(dstPtr, srcPtr, inputs[i]->total() * sizeof(float)); + dstPtr += inputs[i]->total(); + } + } + else + { + for (int n = 0; n < outputs[0].num(); n++) + { + for (size_t i = 0; i < inputs.size(); i++) + { + Blob &inp = *inputs[i]; + memcpy(dstPtr, inp.ptr(n), inp.total(1) * sizeof(float)); + dstPtr += inp.total(1); + } + } + } + } +} +} diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 444061fbc..7195af5e9 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -1,18 +1,20 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include namespace cv { namespace dnn { - //TODO: implement axis number parameter class FullyConnectedLayer : public Layer { bool bias; int numOutputs; + int axis; - int inC, inH, inW; - int inSize; + int innerSize; + + void reshape(const Blob &inp, Blob &out); public: FullyConnectedLayer(LayerParams ¶ms); @@ -28,7 +30,9 @@ namespace dnn { numOutputs = params.get("num_output"); bias = params.get("bias_term", true); + axis = params.get("axis", 1); + CV_Assert(0 <= axis && axis < 4); CV_Assert(params.learnedBlobs.size() >= 1); CV_Assert(!bias || (params.learnedBlobs.size() >= 2 && (int)params.learnedBlobs[1].total() == numOutputs)); @@ -44,36 +48,46 @@ namespace dnn { CV_Assert(inputs.size() > 0); - inC = inputs[0]->channels(); - inH = inputs[0]->rows(); - inW = inputs[0]->cols(); - inSize = inC * inH * inW; - - CV_Assert((size_t)inSize * (size_t)numOutputs == learnedParams[0].total()); + innerSize = (int)inputs[0]->total(axis); + CV_Assert((size_t)innerSize * (size_t)numOutputs == learnedParams[0].total()); + CV_Assert(learnedParams[0].rows() == numOutputs && learnedParams[0].cols() == innerSize); outputs.resize(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { if (i != 0) - CV_Assert(inputs[i]->channels() == inC && inputs[i]->rows() == inH && inputs[i]->cols() == inW); + CV_Assert(inputs[i]->total(axis) == (size_t)innerSize); - outputs[i].create(inputs[i]->num(), numOutputs, 1, 1); + this->reshape(*inputs[i], outputs[i]); } } + void FullyConnectedLayer::reshape(const Blob &inp, Blob &out) + { + Vec4i inpShape = inp.shape(); + Vec4i outShape = Vec4i::all(1); + + for (int a = 0; a < axis; a++) + outShape[a] = inpShape[a]; + outShape[3] = numOutputs; + + out.create(outShape, inp.type()); + } + void FullyConnectedLayer::forward(std::vector &inputs, std::vector &outputs) { for (size_t i = 0; i < inputs.size(); i++) { - int M = inputs[i]->num(); + int M = inputs[i]->total(0, axis); int N = numOutputs; - int K = inSize; + int K = innerSize; Mat srcMat(M, K, CV_32F, inputs[i]->ptr()); - Mat weights(K, N, CV_32F, learnedParams[0].ptr()); + Mat weights(N, K, CV_32F, learnedParams[0].ptr()); Mat dstMat(M, N, CV_32F, outputs[i].ptr()); - cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat); + //important: Caffe stores weights as transposed array + cv::gemm(srcMat, weights, 1, noArray(), 0, dstMat, GEMM_2_T); if (bias) { diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index cadb114c6..5b7997ea0 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -27,6 +27,7 @@ namespace dnn void computeOutputShape(int inH, int inW); void maxPooling(Blob &input, Blob &output); + void avePooling(Blob &input, Blob &output); public: PoolingLayer(LayerParams ¶ms); @@ -85,6 +86,9 @@ namespace dnn case MAX: maxPooling(*inputs[ii], outputs[ii]); break; + case AVE: + avePooling(*inputs[ii], outputs[ii]); + break; default: CV_Error(cv::Error::StsNotImplemented, "Not implemented"); break; @@ -131,6 +135,42 @@ namespace dnn } } + void PoolingLayer::avePooling(Blob &input, Blob &output) + { + for (int n = 0; n < input.num(); ++n) + { + for (int c = 0; c < input.channels(); ++c) + { + float *srcData = input.ptr(n, c); + float *dstData = output.ptr(n, c); + + for (int ph = 0; ph < pooledH; ++ph) + { + for (int pw = 0; pw < pooledW; ++pw) + { + int hstart = ph * strideH - padH; + int wstart = pw * strideH - padH; + int hend = min(hstart + kernelH, inW + padH); + int wend = min(wstart + kernelW, inH + padW); + int pool_size = (hend - hstart) * (wend - wstart); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + hend = min(hend, inH); + wend = min(wend, inW); + + dstData[ph * pooledH + pw] = 0.f; + + for (int h = hstart; h < hend; ++h) + for (int w = wstart; w < wend; ++w) + dstData[ph * pooledH + pw] += srcData[h * inW + w]; + + dstData[ph * pooledH + pw] /= pool_size; + } + } + } + } + } + void PoolingLayer::computeOutputShape(int inH, int inW) { //Yeah, something strange Caffe scheme-) diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index ef2079c71..b0d010276 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -8,6 +8,7 @@ namespace cv { namespace dnn { + //TODO: set default axis number to 1, and add custom shape length in FullyConnected class SoftMaxLayer : public Layer { int axis; @@ -25,7 +26,8 @@ namespace dnn SoftMaxLayer::SoftMaxLayer(LayerParams ¶ms) { - axis = params.get("axis", 1); + //hotfix!!! + axis = params.get("axis", 3); CV_Assert(0 <= axis && axis < 4); } @@ -112,4 +114,4 @@ namespace dnn } } -} \ No newline at end of file +} diff --git a/modules/dnn/test/test_alexnet.cpp b/modules/dnn/test/test_alexnet.cpp index 6c9cab51d..aad9966d0 100644 --- a/modules/dnn/test/test_alexnet.cpp +++ b/modules/dnn/test/test_alexnet.cpp @@ -53,23 +53,10 @@ TEST(Reproducibility_AlexNet, Accuracy) net.setBlob("data", inp); - net.forward("conv1"); - normAssert(blobFromNPY(getTestFile("alexnet_conv1.npy")), net.getBlob("conv1"), "conv1"); - //saveBlobToNPY(convBlob, getTestFile("alexnet_conv1_my.npy")); - - net.forward("relu1"); - normAssert(blobFromNPY(getTestFile("alexnet_relu1.npy")), net.getBlob("relu1"), "relu1"); - - net.forward("norm1"); - normAssert(blobFromNPY(getTestFile("alexnet_norm1.npy")), net.getBlob("norm1"), "norm1"); - net.forward(); Blob out = net.getBlob("prob"); - Blob ref = blobFromNPY(getTestFile("alexnet.npy")); - std::cout << out.shape() << " vs " << ref.shape() << std::endl; - Mat mOut(1, 1000, CV_32F, ref.rawPtr()); - Mat mRef(1, 1000, CV_32F, ref.rawPtr()); - normAssert(mOut, mRef); + Blob ref = blobFromNPY(getTestFile("alexnet_prob.npy")); + normAssert(out, ref, "prob"); } } diff --git a/modules/dnn/test/test_googlenet.cpp b/modules/dnn/test/test_googlenet.cpp new file mode 100644 index 000000000..3f1ce92e6 --- /dev/null +++ b/modules/dnn/test/test_googlenet.cpp @@ -0,0 +1,62 @@ +#include "test_precomp.hpp" +#include "npy_blob.hpp" + +namespace cvtest +{ + +using namespace std; +using namespace testing; +using namespace cv; +using namespace cv::dnn; + +static std::string getOpenCVExtraDir() +{ + return cvtest::TS::ptr()->get_data_path(); +} + +template +static std::string getTestFile(TStr filename) +{ + return (getOpenCVExtraDir() + "/dnn/") + filename; +} + +inline void normAssert(InputArray ref, InputArray get, const char *comment = "") +{ + double normL1 = cvtest::norm(ref, get, NORM_L1)/ ref.getMat().total(); + EXPECT_LE(normL1, 0.0001) << comment; + + double normInf = cvtest::norm(ref, get, NORM_INF); + EXPECT_LE(normInf, 0.001) << comment; +} + +inline void normAssert(Blob ref, Blob test, const char *comment = "") +{ + normAssert(ref.getMatRef(), test.getMatRef(), comment); +} + +TEST(Reproducibility_GoogLeNet, Accuracy) +{ + Net net; + { + Ptr importer = createCaffeImporter(getTestFile("bvlc_googlenet.prototxt"), getTestFile("bvlc_googlenet.caffemodel")); + ASSERT_TRUE(importer != NULL); + importer->populateNet(net); + } + + std::vector inpMats(2); + inpMats[0] = imread(getTestFile("googlenet_0.png")); + inpMats[1] = imread(getTestFile("googlenet_1.png")); + ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty()); + + inpMats[0].convertTo(inpMats[0], CV_32F); + Blob inp(inpMats[0]); + + net.setBlob("data", inp); + net.forward(); + + Blob out = net.getBlob("prob"); + Blob ref = blobFromNPY(getTestFile("googlenet.npy")); + normAssert(out, ref); +} + +} diff --git a/modules/dnn/testdata/dnn/bvlc_alexnet.prototxt b/modules/dnn/testdata/dnn/bvlc_alexnet.prototxt index 8a565803c..ecebda4cf 100644 --- a/modules/dnn/testdata/dnn/bvlc_alexnet.prototxt +++ b/modules/dnn/testdata/dnn/bvlc_alexnet.prototxt @@ -75,12 +75,12 @@ layer { name: "relu2" type: "ReLU" bottom: "conv2" - top: "conv2" + top: "relu2" } layer { name: "norm2" type: "LRN" - bottom: "conv2" + bottom: "relu2" top: "norm2" lrn_param { local_size: 5