From a2e9bfbaf499eb91569353f8b50c17bdacb53276 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 26 Jan 2018 16:45:25 +0300 Subject: [PATCH] Fix padding for average pooling from TensorFlow --- modules/dnn/src/layers/pooling_layer.cpp | 17 +++++++------ modules/dnn/test/test_tf_importer.cpp | 31 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 8e5faf9b89..b451b4b931 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -228,6 +228,7 @@ public: const Mat* src, *rois; Mat *dst, *mask; Size kernel, stride, pad; + String padMode; int nstripes; bool computeMaxIdx; std::vector ofsbuf; @@ -238,7 +239,7 @@ public: computeMaxIdx(0), poolingType(MAX), spatialScale(0) {} static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel, - Size stride, Size pad, int poolingType, float spatialScale, + Size stride, Size pad, String padMode, int poolingType, float spatialScale, bool computeMaxIdx, int nstripes) { CV_Assert(src.isContinuous(), dst.isContinuous(), @@ -257,6 +258,7 @@ public: p.kernel = kernel; p.stride = stride; p.pad = pad; + p.padMode = padMode; p.nstripes = nstripes; p.computeMaxIdx = computeMaxIdx; p.poolingType = poolingType; @@ -336,7 +338,6 @@ public: yend = min(ystart + kernel_h, inp_height + pad_h); srcData = src->ptr(n, c); } - int ydelta = yend - ystart; ystart = max(ystart, 0); yend = min(yend, inp_height); float *dstData = dst->ptr(n, c, y0); @@ -500,15 +501,15 @@ public: } else if (poolingType == AVE) { + bool isSamePad = padMode == "SAME"; for( ; x0 < x1; x0++ ) { int xstart = x0 * stride_w - pad_w; int xend = min(xstart + kernel_w, inp_width + pad_w); - int xdelta = xend - xstart; xstart = max(xstart, 0); xend = min(xend, inp_width); - float inv_kernel_area = 1.f/(ydelta*xdelta); - + float inv_kernel_area = isSamePad ? (yend - ystart) * (xend - xstart) : kernel.area(); + inv_kernel_area = 1.0 / inv_kernel_area; #if CV_SIMD128 if( xstart > 0 && x0 + 7 < x1 && (x0 + 7) * stride_w - pad_w + kernel_w < inp_width ) { @@ -619,21 +620,21 @@ public: { const int nstripes = getNumThreads(); Mat rois; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes); } void avePooling(Mat &src, Mat &dst) { const int nstripes = getNumThreads(); Mat rois, mask; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes); } void roiPooling(const Mat &src, const Mat &rois, Mat &dst) { const int nstripes = getNumThreads(); Mat mask; - PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, type, spatialScale, computeMaxIdx, nstripes); + PoolingInvoker::run(src, rois, dst, mask, kernel, stride, pad, padMode, type, spatialScale, computeMaxIdx, nstripes); } virtual Ptr initMaxPoolingHalide(const std::vector > &inputs) diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 0b4dc6453b..7ddac16650 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -164,6 +164,7 @@ TEST(Test_TensorFlow, pooling) runTensorFlowNet("max_pool_even"); runTensorFlowNet("max_pool_odd_valid"); runTensorFlowNet("max_pool_odd_same"); + runTensorFlowNet("ave_pool_same"); } TEST(Test_TensorFlow, deconvolution) @@ -248,6 +249,36 @@ TEST(Test_TensorFlow, MobileNet_SSD) normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2); } +TEST(Test_TensorFlow, Inception_v2_SSD) +{ + std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false); + std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false); + + Net net = readNetFromTensorflow(model, proto); + Mat img = imread(findDataFile("dnn/street.png", false)); + Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false); + + net.setInput(blob); + // Output has shape 1x1xNx7 where N - number of detections. + // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] + Mat out = net.forward(); + out = out.reshape(1, out.total() / 7); + + Mat detections; + for (int i = 0; i < out.rows; ++i) + { + if (out.at(i, 2) > 0.5) + detections.push_back(out.row(i).colRange(1, 7)); + } + + Mat ref = (Mat_(5, 6) << 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729, + 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131, + 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015, + 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527, + 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384); + normAssert(detections, ref); +} + OCL_TEST(Test_TensorFlow, MobileNet_SSD) { throw SkipTestException("TODO: test is failed");