diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 0704f6b903..42191086e8 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -239,6 +239,10 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN bool computeMaxIdx; String padMode; bool ceilMode; + // If true for average pooling with padding, divide an every output region + // by a whole kernel area. Otherwise exclude zero padded values and divide + // by number of real values. + bool avePoolPaddedArea; // ROIPooling parameters. Size pooledSize; float spatialScale; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 7cd8a341fb..2f5f0412af 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -106,6 +106,7 @@ public: setParamsFrom(params); ceilMode = params.get("ceil_mode", true); spatialScale = params.get("spatial_scale", 1); + avePoolPaddedArea = params.get("ave_pool_padded_area", true); } #ifdef HAVE_OPENCL @@ -259,7 +260,7 @@ public: const Mat* src, *rois; Mat *dst, *mask; Size kernel, stride, pad; - String padMode; + bool avePoolPaddedArea; int nstripes; bool computeMaxIdx; std::vector ofsbuf; @@ -270,7 +271,7 @@ public: computeMaxIdx(0), poolingType(MAX), spatialScale(0) {} static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel, - Size stride, Size pad, String padMode, int poolingType, float spatialScale, + Size stride, Size pad, bool avePoolPaddedArea, int poolingType, float spatialScale, bool computeMaxIdx, int nstripes) { CV_Assert(src.isContinuous(), dst.isContinuous(), @@ -289,7 +290,7 @@ public: p.kernel = kernel; p.stride = stride; p.pad = pad; - p.padMode = padMode; + p.avePoolPaddedArea = avePoolPaddedArea; p.nstripes = nstripes; p.computeMaxIdx = computeMaxIdx; p.poolingType = poolingType; @@ -369,6 +370,7 @@ public: yend = min(ystart + kernel_h, inp_height + pad_h); srcData = src->ptr(n, c); } + int ydelta = yend - ystart; ystart = max(ystart, 0); yend = min(yend, inp_height); float *dstData = dst->ptr(n, c, y0); @@ -532,14 +534,14 @@ public: } else if (poolingType == AVE) { - bool isSamePad = padMode == "SAME"; for( ; x0 < x1; x0++ ) { int xstart = x0 * stride_w - pad_w; int xend = min(xstart + kernel_w, inp_width + pad_w); + int xdelta = xend - xstart; xstart = max(xstart, 0); xend = min(xend, inp_width); - float inv_kernel_area = isSamePad ? (yend - ystart) * (xend - xstart) : kernel.area(); + float inv_kernel_area = avePoolPaddedArea ? xdelta * ydelta : ((yend - ystart) * (xend - xstart)); inv_kernel_area = 1.0 / inv_kernel_area; #if CV_SIMD128 if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width ) @@ -651,21 +653,21 @@ public: { const int nstripes = getNumThreads(); Mat rois; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); } void avePooling(Mat &src, Mat &dst) { const int nstripes = getNumThreads(); Mat rois, mask; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); } void roiPooling(const Mat &src, const Mat &rois, Mat &dst) { const int nstripes = getNumThreads(); Mat mask; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, avePoolPaddedArea, type, spatialScale, computeMaxIdx, nstripes); } virtual Ptr initMaxPoolingHalide(const std::vector > &inputs) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index bc112d3560..274a234ab8 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1078,6 +1078,7 @@ void TFImporter::populateNet(Net dstNet) else if (type == "AvgPool") { layerParams.set("pool", "ave"); + layerParams.set("ave_pool_padded_area", false); setKSize(layerParams, layer); setStrides(layerParams, layer); diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index b664d79582..db660ff516 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -559,7 +559,11 @@ struct TorchImporter layerParams.set("indices_blob_id", tensorParams["indices"].first); } if (nnName == "SpatialAveragePooling") + { layerParams.set("pool", "AVE"); + layerParams.set("ave_pool_padded_area", scalarParams.has("count_include_pad") && + scalarParams.get("count_include_pad")); + } convertTorchKernelsParams(scalarParams, layerParams); CV_Assert(scalarParams.has("ceil_mode")); diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 856f06c5db..784c13c8b5 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -806,4 +806,29 @@ INSTANTIATE_TEST_CASE_P(Layer_Test, Crop, Combine( /*offset value*/ Values(3, 4) )); +// Check that by default average pooling layer should not count zero padded values +// into the normalization area. +TEST(Layer_Test_Average_pooling_kernel_area, Accuracy) +{ + LayerParams lp; + lp.name = "testAvePool"; + lp.type = "Pooling"; + lp.set("kernel_size", 2); + lp.set("stride", 2); + lp.set("pool", "AVE"); + + Net net; + net.addLayerToPrev(lp.name, lp.type, lp); + // 1 2 | 3 + // 4 5 | 6 + // ----+-- + // 7 8 | 9 + Mat inp = (Mat_(3, 3) << 1, 2, 3, 4, 5, 6, 7, 8, 9); + Mat target = (Mat_(2, 2) << (1 + 2 + 4 + 5) / 4.f, (3 + 6) / 2.f, (7 + 8) / 2.f, 9); + Mat tmp = blobFromImage(inp); + net.setInput(blobFromImage(inp)); + Mat out = net.forward(); + normAssert(out, blobFromImage(target)); +} + }} // namespace