From 5510718381f8a35f2beb6c488b2ae1d8501cf0fc Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Thu, 4 Jul 2024 13:33:20 +0800 Subject: [PATCH] Merge pull request #25810 from fengyuentau:python/fix_parsing_3d_mat_in_dnn python: attempts to fix 3d mat parsing problem for dnn #25810 Fixes https://github.com/opencv/opencv/issues/25762 https://github.com/opencv/opencv/issues/23242 Relates https://github.com/opencv/opencv/issues/25763 https://github.com/opencv/opencv/issues/19091 Although `cv.Mat` has already been introduced to workaround this problem, people do not know it and it kind of leads to confusion with `numpy.array`. This patch adds a "switch" to turn off the auto multichannel feature when the API is from cv::dnn::Net (more specifically, `setInput`) and the parameter is of type `Mat`. This patch only leads to changes of three places in `pyopencv_generated_types_content.h`: ```.diff static PyObject* pyopencv_cv_dnn_dnn_Net_setInput(PyObject* self, PyObject* py_args, PyObject* kw) { ... - pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) && + pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) && ... } // I guess we also need to change this as one-channel blob is expected for param static PyObject* pyopencv_cv_dnn_dnn_Net_setParam(PyObject* self, PyObject* py_args, PyObject* kw) { ... - pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) ) + pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) ) ... - pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 0)) ) + pyopencv_to_safe(pyobj_blob, blob, ArgInfo("blob", 8)) ) ... } ``` Others are unchanged, e.g. `dnn_SegmentationModel` and stuff like that. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/core/include/opencv2/core/cvdef.h | 1 + modules/dnn/include/opencv2/dnn/dnn.hpp | 10 ++-- modules/dnn/misc/python/test/test_dnn.py | 61 ++++++++++++++++++++--- modules/python/src2/cv2.hpp | 5 +- modules/python/src2/cv2_convert.cpp | 2 +- modules/python/src2/cv2_convert.hpp | 34 +++++++++---- modules/python/src2/gen2.py | 5 ++ modules/python/src2/hdr_parser.py | 4 ++ 8 files changed, 100 insertions(+), 22 deletions(-) diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 748ecb9ece..ff1a3d7a5f 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -478,6 +478,7 @@ Cv64suf; #define CV_OUT #define CV_PROP #define CV_PROP_RW +#define CV_ND // Indicates that input data should be parsed into Mat without channels #define CV_WRAP #define CV_WRAP_AS(synonym) #define CV_WRAP_MAPPABLE(mappable) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index b516f80bde..0077ae4853 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -642,13 +642,13 @@ CV__DNN_INLINE_NS_BEGIN * @param outputName name for layer which output is needed to get * @details If @p outputName is empty, runs forward pass for the whole network. */ - CV_WRAP void forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()); + CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs, const String& outputName = String()); /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. * @param outputBlobs contains blobs for first outputs of specified layers. * @param outBlobNames names for layers which outputs are needed to get */ - CV_WRAP void forward(OutputArrayOfArrays outputBlobs, + CV_WRAP void forward(CV_ND OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames); /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames. @@ -727,7 +727,7 @@ CV__DNN_INLINE_NS_BEGIN * as: * \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f] */ - CV_WRAP void setInput(InputArray blob, const String& name = "", + CV_WRAP void setInput(CV_ND InputArray blob, const String& name = "", double scalefactor = 1.0, const Scalar& mean = Scalar()); /** @brief Sets the new value for the learned param of the layer. @@ -738,8 +738,8 @@ CV__DNN_INLINE_NS_BEGIN * @note If shape of the new blob differs from the previous shape, * then the following forward pass may fail. */ - CV_WRAP void setParam(int layer, int numParam, const Mat &blob); - CV_WRAP inline void setParam(const String& layerName, int numParam, const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); } + CV_WRAP void setParam(int layer, int numParam, CV_ND const Mat &blob); + CV_WRAP inline void setParam(const String& layerName, int numParam, CV_ND const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); } /** @brief Returns parameter blob of the layer. * @param layer name or id of the layer. diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index e3cc376dd2..8d7eed52af 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -455,10 +455,6 @@ class dnn_test(NewOpenCVTests): "Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") input = np.load(input_file) - # we have to expand the shape of input tensor because Python bindings cut 3D tensors to 2D - # it should be fixed in future. see : https://github.com/opencv/opencv/issues/19091 - # please remove `expand_dims` after that - input = np.expand_dims(input, axis=3) gold_output = np.load(output_file) for backend, target in self.dnnBackendsAndTargets: @@ -469,10 +465,63 @@ class dnn_test(NewOpenCVTests): net.setPreferableBackend(backend) net.setPreferableTarget(target) + # Check whether 3d shape is parsed correctly for setInput net.setInput(input) - real_output = net.forward() - normAssert(self, real_output, gold_output, "", getDefaultThreshold(target)) + # Case 0: test API `forward(const String& outputName = String()` + real_output = net.forward() # Retval is a np.array of shape [2, 5, 3] + normAssert(self, real_output, gold_output, "Case 1", getDefaultThreshold(target)) + + ''' + Pre-allocate output memory with correct shape. + Normally Python users do not use in this way, + but we have to test it since we design API in this way + ''' + # Case 1: a np.array with a string of output name. + # It tests API `forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()` + # when outputBlobs is a np.array and we expect it to be the only output. + real_output = np.empty([2, 5, 3], dtype=np.float32) + real_output = net.forward(real_output, "237") # Retval is a tuple with a np.array of shape [2, 5, 3] + normAssert(self, real_output, gold_output, "Case 1", getDefaultThreshold(target)) + + # Case 2: a tuple of np.array with a string of output name. + # It tests API `forward(OutputArrayOfArrays outputBlobs, const String& outputName = String()` + # when outputBlobs is a container of several np.array and we expect to save all outputs accordingly. + real_output = tuple(np.empty([2, 5, 3], dtype=np.float32)) + real_output = net.forward(real_output, "237") # Retval is a tuple with a np.array of shape [2, 5, 3] + normAssert(self, real_output, gold_output, "Case 2", getDefaultThreshold(target)) + + # Case 3: a tuple of np.array with a string of output name. + # It tests API `forward(OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames)` + real_output = tuple(np.empty([2, 5, 3], dtype=np.float32)) + # Note that it does not support parsing a list , e.g. ["237"] + real_output = net.forward(real_output, ("237")) # Retval is a tuple with a np.array of shape [2, 5, 3] + normAssert(self, real_output, gold_output, "Case 3", getDefaultThreshold(target)) + + def test_set_param_3d(self): + model_path = self.find_dnn_file('dnn/onnx/models/matmul_3d_init.onnx') + input_file = self.find_dnn_file('dnn/onnx/data/input_matmul_3d_init.npy') + output_file = self.find_dnn_file('dnn/onnx/data/output_matmul_3d_init.npy') + + input = np.load(input_file) + output = np.load(output_file) + + for backend, target in self.dnnBackendsAndTargets: + printParams(backend, target) + + net = cv.dnn.readNet(model_path) + + node_name = net.getLayerNames()[0] + w = net.getParam(node_name, 0) # returns the original tensor of three-dimensional shape + net.setParam(node_name, 0, w) # set param once again to see whether tensor is converted with correct shape + + net.setPreferableBackend(backend) + net.setPreferableTarget(target) + + net.setInput(input) + res_output = net.forward() + + normAssert(self, output, res_output, "", getDefaultThreshold(target)) def test_scalefactor_assign(self): params = cv.dnn.Image2BlobParams() diff --git a/modules/python/src2/cv2.hpp b/modules/python/src2/cv2.hpp index 06080f1aa1..8cd0c2e4b1 100644 --- a/modules/python/src2/cv2.hpp +++ b/modules/python/src2/cv2.hpp @@ -46,19 +46,22 @@ private: static const uint32_t arg_outputarg_flag = 0x1; static const uint32_t arg_arithm_op_src_flag = 0x2; static const uint32_t arg_pathlike_flag = 0x4; + static const uint32_t arg_nd_mat_flag = 0x8; public: const char* name; bool outputarg; bool arithm_op_src; bool pathlike; + bool nd_mat; // more fields may be added if necessary ArgInfo(const char* name_, uint32_t arg_) : name(name_), outputarg((arg_ & arg_outputarg_flag) != 0), arithm_op_src((arg_ & arg_arithm_op_src_flag) != 0), - pathlike((arg_ & arg_pathlike_flag) != 0) {} + pathlike((arg_ & arg_pathlike_flag) != 0), + nd_mat((arg_ & arg_nd_mat_flag) != 0) {} private: ArgInfo(const ArgInfo&) = delete; diff --git a/modules/python/src2/cv2_convert.cpp b/modules/python/src2/cv2_convert.cpp index 35766b47c9..0626e42e53 100644 --- a/modules/python/src2/cv2_convert.cpp +++ b/modules/python/src2/cv2_convert.cpp @@ -173,7 +173,7 @@ bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo& info) CV_LOG_DEBUG(NULL, "Incoming ndarray '" << info.name << "': ndims=" << ndims << " _sizes=" << pycv_dumpArray(_sizes, ndims) << " _strides=" << pycv_dumpArray(_strides, ndims)); - bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX; + bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX && !info.nd_mat; if (pyopencv_Mat_TypePtr && PyObject_TypeCheck(o, pyopencv_Mat_TypePtr)) { bool wrapChannels = false; diff --git a/modules/python/src2/cv2_convert.hpp b/modules/python/src2/cv2_convert.hpp index 0c0fbd7b96..979425c3f9 100644 --- a/modules/python/src2/cv2_convert.hpp +++ b/modules/python/src2/cv2_convert.hpp @@ -340,21 +340,37 @@ static bool pyopencv_to_generic_vec(PyObject* obj, std::vector& value, const { return true; } - if (!PySequence_Check(obj)) + if (info.nd_mat && PyArray_Check(obj)) { - failmsg("Can't parse '%s'. Input argument doesn't provide sequence protocol", info.name); - return false; + /* + If obj is marked as nd mat and of array type, it is parsed to a single + mat in the target vector to avoid being split into multiple mats + */ + value.resize(1); + if (!pyopencv_to(obj, value.front(), info)) + { + failmsg("Can't parse '%s'. Array item has a wrong type", info.name); + return false; + } } - const size_t n = static_cast(PySequence_Size(obj)); - value.resize(n); - for (size_t i = 0; i < n; i++) + else // parse as sequence { - SafeSeqItem item_wrap(obj, i); - if (!pyopencv_to(item_wrap.item, value[i], info)) + if (!PySequence_Check(obj)) { - failmsg("Can't parse '%s'. Sequence item with index %lu has a wrong type", info.name, i); + failmsg("Can't parse '%s'. Input argument doesn't provide sequence protocol", info.name); return false; } + const size_t n = static_cast(PySequence_Size(obj)); + value.resize(n); + for (size_t i = 0; i < n; i++) + { + SafeSeqItem item_wrap(obj, i); + if (!pyopencv_to(item_wrap.item, value[i], info)) + { + failmsg("Can't parse '%s'. Sequence item with index %lu has a wrong type", info.name, i); + return false; + } + } } return true; } diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 29a91958ee..3249c57f82 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -488,6 +488,10 @@ class ArgInfo(object): return self.name + '_' return self.name + @property + def nd_mat(self): + return '/ND' in self._modifiers + @property def inputarg(self): return '/O' not in self._modifiers @@ -528,6 +532,7 @@ class ArgInfo(object): arg = 0x01 if self.outputarg else 0x0 arg += 0x02 if self.arithm_op_src_arg else 0x0 arg += 0x04 if self.pathlike else 0x0 + arg += 0x08 if self.nd_mat else 0x0 return "ArgInfo(\"%s\", %d)" % (self.name, arg) diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index fa2d0077d9..27f0fe0963 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -82,6 +82,10 @@ class CppHeaderParser(object): modlist = [] # pass 0: extracts the modifiers + if "CV_ND" in arg_str: + modlist.append("/ND") + arg_str = arg_str.replace("CV_ND", "") + if "CV_OUT" in arg_str: modlist.append("/O") arg_str = arg_str.replace("CV_OUT", "")