From 6e593cd1f0d95cca51892132ea30928728a80b18 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 27 Sep 2017 18:58:50 +0300 Subject: [PATCH] Swap dimensions of deconvolution kernel --- .../dnn/include/opencv2/dnn/all_layers.hpp | 1 + modules/dnn/src/layers/convolution_layer.cpp | 73 +++++++++---------- modules/dnn/src/tensorflow/tf_importer.cpp | 4 +- modules/dnn/src/torch/torch_importer.cpp | 5 +- modules/dnn/test/test_halide_layers.cpp | 4 +- 5 files changed, 39 insertions(+), 48 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index cf47c70a4e..11e8221e9d 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -199,6 +199,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN public: Size kernel, stride, pad, dilation, adjustPad; String padMode; + int numOutput; }; class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 68c71bc758..8440662367 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -252,24 +252,13 @@ public: } Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn); - + Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width; + Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height; Halide::Expr kc = r.z; - if (group > 1) + for (int i = 1; i < group; ++i) { - int outCnBound = outGroupCn; - int inpChBound = inpGroupCn; - Halide::Expr shift = select(c < outCnBound, 0, inpChBound); - for (int i = 2; i < group; ++i) - { - outCnBound += outGroupCn; - inpChBound += inpGroupCn; - shift = select(c < outCnBound, shift, inpChBound); - } - kc += shift; + kc = select(c < outGroupCn * i, kc, inpGroupCn * i + r.z); } - - Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width; - Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height; Halide::Expr topExpr = sum(padded_input(kx, ky, kc, n) * weights(r.x, r.y, r.z, c)); if (hasBias()) @@ -278,7 +267,6 @@ public: topExpr += bias(c); } top(x, y, c, n) = topExpr; - Ptr pp(new HalideBackendNode({ padded_input, top })); return Ptr(new HalideBackendNode({ padded_input, top })); #endif // HAVE_HALIDE return Ptr(); @@ -793,7 +781,7 @@ public: int inpH = inpShape[2]; int inpW = inpShape[3]; int outCn = outShape[1]; - int ngroups = inpCn / blobs[0].size[1]; + int ngroups = inpCn / blobs[0].size[0]; int outGroupCn = outCn / ngroups; int ksize = outGroupCn * kernel.height * kernel.width; return shape(ksize, inpH * inpW); @@ -804,7 +792,7 @@ public: std::vector &outputs, std::vector &internals) const { - CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]); + CV_Assert(!hasBias() || blobs[1].total() == (size_t)numOutput); CV_Assert(inputs.size() != 0); int inpCn = inputs[0][1]; @@ -813,12 +801,13 @@ public: int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height; int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width; - int outCn = blobs[0].size[0]; + int outCn = numOutput; - int ngroups = inpCn / blobs[0].size[1]; + CV_Assert(outCn % blobs[0].size[1] == 0); + int ngroups = outCn / blobs[0].size[1]; CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0); - CV_Assert(blobs[0].size[0] == outCn && blobs[0].size[1] == inpCn / ngroups); + CV_Assert(blobs[0].size[0] == inpCn); int dims[] = {inputs[0][0], outCn, outH, outW}; outputs.resize(inputs.size(), shape(dims)); @@ -1073,7 +1062,7 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - int outCn = blobs[0].size[0]; + int outCn = numOutput; int inpCn = inputs[0]->size[1]; bool is1x1flag = is1x1(); int nstripes = getNumThreads(); @@ -1086,9 +1075,9 @@ public: for (size_t ii = 0; ii < outputs.size(); ii++) { - int ngroups = inpCn / blobs[0].size[1]; - int inpGroupCn = blobs[0].size[1]; - int outGroupCn = outCn / ngroups; + int ngroups = outCn / blobs[0].size[1]; + int inpGroupCn = inpCn / ngroups; + int outGroupCn = blobs[0].size[1]; const Mat& inp = *inputs[ii]; Mat& out = outputs[ii]; int numImg = inp.size[0]; @@ -1126,18 +1115,16 @@ public: #ifdef HAVE_HALIDE Halide::Buffer inputBuffer = halideBuffer(inputs[0]); - int inW, inH, inC, inN, outC = blobs[0].size[0]; + int inW, inH, inC, inN; getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN); - - if (inC / blobs[0].size[1] != 1) - CV_Error(cv::Error::StsNotImplemented, - "Halide backend for Deconvolution with group > 1 is not implemented"); + const int outGroupCn = blobs[0].size[1]; + const int group = numOutput / outGroupCn; + const int inpGroupCn = blobs[0].size[0] / group; Halide::Var x("x"), y("y"), c("c"), n("n"); Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name)); Halide::Func padded_input(name + "_constant_exterior"); - auto weights = wrapToHalideBuffer(blobs[0], {kernel.width, - kernel.height, outC, inC}); + auto weights = wrapToHalideBuffer(blobs[0]); Halide::Func dilated_input("dilated_input"); dilated_input(x, y, c, n) = 0.0f; @@ -1153,13 +1140,21 @@ public: 0, inC, 0, inN); padded_input(x, y, c, n) = bounded(x, y, c, n); - Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inC); - Halide::Expr topExpr = sum( - padded_input(x + pad.width - r.x, y + pad.height - r.y, r.z, n) * - weights(r.x, r.y, c, r.z)); + Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn); + Halide::Expr kx = x + pad.width - r.x; + Halide::Expr ky = y + pad.height - r.y; + Halide::Expr kInC = r.z; + Halide::Expr kOutC = c; + for (int i = 1; i < group; ++i) + { + kInC = select(c < outGroupCn * i, kInC, inpGroupCn * i + r.z); + kOutC = select(c < outGroupCn * i, kOutC, c - outGroupCn * i); + } + Halide::Expr topExpr = sum(padded_input(kx, ky, kInC, n) * + weights(r.x, r.y, kOutC, kInC)); if (hasBias()) { - auto bias = wrapToHalideBuffer(blobs[1], {outC}); + auto bias = wrapToHalideBuffer(blobs[1], {numOutput}); topExpr += bias(c); } top(x, y, c, n) = topExpr; @@ -1194,13 +1189,13 @@ static void initConvDeconvLayerFromCaffe(Ptr l, const Laye l->dilation.width, l->padMode); bool bias = params.get("bias_term", true); - int numOutput = params.get("num_output"); + l->numOutput = params.get("num_output"); int ngroups = params.get("group", 1); l->adjustPad.height = params.get("adj_h", 0); l->adjustPad.width = params.get("adj_w", 0); - CV_Assert(numOutput % ngroups == 0); + CV_Assert(l->numOutput % ngroups == 0); CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1)); CV_Assert(l->adjustPad.width < l->stride.width && l->adjustPad.height < l->stride.height); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 67565cc591..30edd347dc 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1038,13 +1038,11 @@ void TFImporter::populateNet(Net dstNet) } kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]); - // Swap just numbers of input and output channels. - std::swap(layerParams.blobs[0].size[0], layerParams.blobs[0].size[1]); const int* kshape = layerParams.blobs[0].size.p; layerParams.set("kernel_h", kshape[2]); layerParams.set("kernel_w", kshape[3]); - layerParams.set("num_output", kshape[0]); + layerParams.set("num_output", kshape[1]); setStrides(layerParams, layer); setPadding(layerParams, layer); diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 56c55d6639..3c3edce7dd 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -796,10 +796,7 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("adj_h", static_cast(scalarParams.get("adjH"))); layerParams.set("num_output", static_cast(scalarParams.get("nOutputPlane"))); - Mat weights = tensorParams["weight"].second; - CV_Assert(weights.dims == 4); - int reorderedShape[] = { weights.size[1], weights.size[0], weights.size[2], weights.size[3] }; - layerParams.blobs.push_back(weights.reshape(1, 4, reorderedShape)); + layerParams.blobs.push_back(tensorParams["weight"].second); bool bias = tensorParams.count("bias"); layerParams.set("bias_term", bias); diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 79f767a134..d0fb5f60fd 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -107,7 +107,7 @@ TEST_P(Deconvolution, Accuracy) Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]); bool hasBias = get<6>(GetParam()); - Mat weights({outChannels, inChannels / group, kernel.height, kernel.width}, CV_32F); + Mat weights({inChannels, outChannels / group, kernel.height, kernel.width}, CV_32F); randu(weights, -1.0f, 1.0f); LayerParams lp; @@ -139,7 +139,7 @@ TEST_P(Deconvolution, Accuracy) INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine( /*in channels, out channels, group*/ - Values(Vec3i(6, 4, 1), Vec3i(6, 9, 1)), + Values(Vec3i(6, 4, 1), Vec3i(6, 9, 3)), /*in size*/ Values(Size(5, 6)), /*kernel*/ Values(Size(3, 1), Size(1, 3)), /*pad*/ Values(Size(1, 0), Size(0, 1)),