From 50462dcdc6d0f1cccae49f0fa54ccddd2a0e4a6c Mon Sep 17 00:00:00 2001
From: YashasSamaga <yashas_2010@yahoo.com>
Date: Mon, 13 Sep 2021 20:44:33 +0530
Subject: [PATCH] fix effrank assert to allow input effrank <= output effrank

---
 modules/dnn/src/cuda4dnn/primitives/padding.hpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/modules/dnn/src/cuda4dnn/primitives/padding.hpp b/modules/dnn/src/cuda4dnn/primitives/padding.hpp
index cbf75131a1..ce2a5c3c47 100644
--- a/modules/dnn/src/cuda4dnn/primitives/padding.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/padding.hpp
@@ -52,14 +52,19 @@ namespace cv { namespace dnn { namespace cuda4dnn {
             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
             auto output = output_wrapper->getSpan();
 
-            auto effective_rank = get_effective_rank(input);
-            CV_Assert(get_effective_rank(input) == get_effective_rank(output));
-
             /* suppose we require padding for the first spatial axis (H in NCHW or D in NCDHW)
              *
              * there could be a case where the batch axis, channel axis, and the first spatial axis are all one
              * this would result in effective rank being less than the number of axes requiring padding
              */
+            /* the effective rank of the input may be smaller than the effective rank of the output but the converse is never true
+             * input: [1, 1, 1, 3]; effective rank = 1
+             * output: [1, 1, 3, 3]; effective rank = 2
+             *
+             * hence, we use the effective rank of the output tensor for the padding operation
+             */
+            auto effective_rank = get_effective_rank(output);
+            CV_Assert(get_effective_rank(input) <= effective_rank);
             effective_rank = std::max(effective_rank, dstRanges.size());
 
             for (int i = effective_rank - dstRanges.size(); i < effective_rank; i++)