diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 5e5e295de3..829d57271d 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -108,6 +108,21 @@ CV__DNN_INLINE_NS_BEGIN DNN_TARGET_NPU, }; + /** + * @brief Enum of data layout for model inference. + * @see Image2BlobParams + */ + enum DataLayout + { + DNN_LAYOUT_UNKNOWN = 0, + DNN_LAYOUT_ND = 1, //!< OpenCV data layout for 2D data. + DNN_LAYOUT_NCHW = 2, //!< OpenCV data layout for 4D data. + DNN_LAYOUT_NCDHW = 3, //!< OpenCV data layout for 5D data. + DNN_LAYOUT_NHWC = 4, //!< Tensorflow-like data layout for 4D data. + DNN_LAYOUT_NDHWC = 5, //!< Tensorflow-like data layout for 5D data. + DNN_LAYOUT_PLANAR = 6, //!< Tensorflow-like data layout, it should only be used at tf or tflite model parsing. + }; + CV_EXPORTS std::vector< std::pair > getAvailableBackends(); CV_EXPORTS_W std::vector getAvailableTargets(dnn::Backend be); @@ -1111,10 +1126,10 @@ CV__DNN_INLINE_NS_BEGIN /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center, * subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels. * @param image input image (with 1-, 3- or 4-channels). + * @param scalefactor multiplier for @p images values. * @param size spatial size for output image * @param mean scalar with mean values which are subtracted from channels. Values are intended * to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true. - * @param scalefactor multiplier for @p image values. * @param swapRB flag which indicates that swap first and last channels * in 3-channel image is necessary. * @param crop flag which indicates whether image will be cropped after resize or not @@ -1123,6 +1138,9 @@ CV__DNN_INLINE_NS_BEGIN * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. * @returns 4-dimensional Mat with NCHW dimensions order. + * + * @note + * The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor. */ CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, @@ -1153,6 +1171,9 @@ CV__DNN_INLINE_NS_BEGIN * dimension in @p size and another one is equal or larger. Then, crop from the center is performed. * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. * @returns 4-dimensional Mat with NCHW dimensions order. + * + * @note + * The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor. */ CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0, Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, @@ -1167,6 +1188,74 @@ CV__DNN_INLINE_NS_BEGIN const Scalar& mean = Scalar(), bool swapRB=false, bool crop=false, int ddepth=CV_32F); + /** + * @brief Enum of image processing mode. + * To facilitate the specialization pre-processing requirements of the dnn model. + * For example, the `letter box` often used in the Yolo series of models. + * @see Image2BlobParams + */ + enum ImagePaddingMode + { + DNN_PMODE_NULL = 0, // !< Default. Resize to required input size without extra processing. + DNN_PMODE_CROP_CENTER = 1, // !< Image will be cropped after resize. + DNN_PMODE_LETTERBOX = 2, // !< Resize image to the desired size while preserving the aspect ratio of original image. + }; + + /** @brief Processing params of image to blob. + * + * It includes all possible image processing operations and corresponding parameters. + * + * @see blobFromImageWithParams + * + * @note + * The order and usage of `scalefactor` and `mean` are (input - mean) * scalefactor. + * The order and usage of `scalefactor`, `size`, `mean`, `swapRB`, and `ddepth` are consistent + * with the function of @ref blobFromImage. + */ + struct CV_EXPORTS_W_SIMPLE Image2BlobParams + { + CV_WRAP Image2BlobParams(); + CV_WRAP Image2BlobParams(const Scalar& scalefactor, const Size& size = Size(), const Scalar& mean = Scalar(), + bool swapRB = false, int ddepth = CV_32F, DataLayout datalayout = DNN_LAYOUT_NCHW, + ImagePaddingMode mode = DNN_PMODE_NULL); + + CV_PROP_RW Scalar scalefactor; //!< scalefactor multiplier for input image values. + CV_PROP_RW Size size; //!< Spatial size for output image. + CV_PROP_RW Scalar mean; //!< Scalar with mean values which are subtracted from channels. + CV_PROP_RW bool swapRB; //!< Flag which indicates that swap first and last channels + CV_PROP_RW int ddepth; //!< Depth of output blob. Choose CV_32F or CV_8U. + CV_PROP_RW DataLayout datalayout; //!< Order of output dimensions. Choose DNN_LAYOUT_NCHW or DNN_LAYOUT_NHWC. + CV_PROP_RW ImagePaddingMode paddingmode; //!< Image padding mode. @see ImagePaddingMode. + }; + + /** @brief Creates 4-dimensional blob from image with given params. + * + * @details This function is an extension of @ref blobFromImage to meet more image preprocess needs. + * Given input image and preprocessing parameters, and function outputs the blob. + * + * @param image input image (all with 1-, 3- or 4-channels). + * @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob. + * @return 4-dimensional Mat. + */ + CV_EXPORTS_W Mat blobFromImageWithParams(InputArray image, const Image2BlobParams& param = Image2BlobParams()); + + /** @overload */ + CV_EXPORTS_W void blobFromImageWithParams(InputArray image, OutputArray blob, const Image2BlobParams& param = Image2BlobParams()); + + /** @brief Creates 4-dimensional blob from series of images with given params. + * + * @details This function is an extension of @ref blobFromImages to meet more image preprocess needs. + * Given input image and preprocessing parameters, and function outputs the blob. + * + * @param images input image (all with 1-, 3- or 4-channels). + * @param param struct of Image2BlobParams, contains all parameters needed by processing of image to blob. + * @returns 4-dimensional Mat. + */ + CV_EXPORTS_W Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& param = Image2BlobParams()); + + /** @overload */ + CV_EXPORTS_W void blobFromImagesWithParams(InputArrayOfArrays images, OutputArray blob, const Image2BlobParams& param = Image2BlobParams()); + /** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure * (std::vector). * @param[in] blob_ 4 dimensional array (images, channels, height, width) in floating point precision (CV_32F) from diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 82d07f402b..54b51b4133 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -119,7 +119,7 @@ class dnn_test(NewOpenCVTests): inp = np.random.standard_normal([1, 2, 10, 11]).astype(np.float32) net.setInput(inp) net.forward() - except BaseException as e: + except BaseException: return False return True @@ -153,6 +153,41 @@ class dnn_test(NewOpenCVTests): target = target.transpose(2, 0, 1).reshape(1, 3, height, width) # to NCHW normAssert(self, blob, target) + def test_blobFromImageWithParams(self): + np.random.seed(324) + + width = 6 + height = 7 + stddev = np.array([0.2, 0.3, 0.4]) + scalefactor = 1.0/127.5 * stddev + mean = (10, 20, 30) + + # Test arguments names. + img = np.random.randint(0, 255, [4, 5, 3]).astype(np.uint8) + + param = cv.dnn.Image2BlobParams() + param.scalefactor = scalefactor + param.size = (6, 7) + param.mean = mean + param.swapRB=True + param.datalayout = cv.dnn.DNN_LAYOUT_NHWC + + blob = cv.dnn.blobFromImageWithParams(img, param) + blob_args = cv.dnn.blobFromImageWithParams(img, cv.dnn.Image2BlobParams(scalefactor=scalefactor, size=(6, 7), mean=mean, + swapRB=True, datalayout=cv.dnn.DNN_LAYOUT_NHWC)) + normAssert(self, blob, blob_args) + + target2 = cv.resize(img, (width, height), interpolation=cv.INTER_LINEAR).astype(np.float32) + target2 = target2[:,:,[2, 1, 0]] # BGR2RGB + target2[:,:,0] -= mean[0] + target2[:,:,1] -= mean[1] + target2[:,:,2] -= mean[2] + + target2[:,:,0] *= scalefactor[0] + target2[:,:,1] *= scalefactor[1] + target2[:,:,2] *= scalefactor[2] + target2 = target2.reshape(1, height, width, 3) # to NHWC + normAssert(self, blob, target2) def test_model(self): img_path = self.find_dnn_file("dnn/street.png") diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp index aa4a6eadf1..aad067ee37 100644 --- a/modules/dnn/src/dnn_utils.cpp +++ b/modules/dnn/src/dnn_utils.cpp @@ -11,8 +11,17 @@ namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN +Image2BlobParams::Image2BlobParams():scalefactor(Scalar::all(1.0)), size(Size()), mean(Scalar()), swapRB(false), ddepth(CV_32F), + datalayout(DNN_LAYOUT_NCHW), paddingmode(DNN_PMODE_NULL) +{} -Mat blobFromImage(InputArray image, double scalefactor, const Size& size, +Image2BlobParams::Image2BlobParams(const Scalar& scalefactor_, const Size& size_, const Scalar& mean_, bool swapRB_, + int ddepth_, DataLayout datalayout_, ImagePaddingMode mode_): + scalefactor(scalefactor_), size(size_), mean(mean_), swapRB(swapRB_), ddepth(ddepth_), + datalayout(datalayout_), paddingmode(mode_) +{} + +Mat blobFromImage(InputArray image, const double scalefactor, const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth) { CV_TRACE_FUNCTION(); @@ -42,16 +51,55 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) { CV_TRACE_FUNCTION(); - CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); - if (ddepth == CV_8U) - { - CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth"); - CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); - } + Image2BlobParams param(Scalar::all(scalefactor), size, mean_, swapRB, ddepth); + if (crop) + param.paddingmode = DNN_PMODE_CROP_CENTER; + blobFromImagesWithParams(images_, blob_, param); +} + +Mat blobFromImageWithParams(InputArray image, const Image2BlobParams& param) +{ + CV_TRACE_FUNCTION(); + Mat blob; + blobFromImageWithParams(image, blob, param); + return blob; +} + +void blobFromImageWithParams(InputArray image, OutputArray blob, const Image2BlobParams& param) +{ + CV_TRACE_FUNCTION(); + std::vector images(1, image.getMat()); + blobFromImagesWithParams(images, blob, param); +} +Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams& param) +{ + CV_TRACE_FUNCTION(); + Mat blob; + blobFromImagesWithParams(images, blob, param); + return blob; +} + +void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, const Image2BlobParams& param) +{ + CV_TRACE_FUNCTION(); + CV_CheckType(param.ddepth, param.ddepth == CV_32F || param.ddepth == CV_8U, + "Blob depth should be CV_32F or CV_8U"); + + Size size = param.size; std::vector images; images_.getMatVector(images); CV_Assert(!images.empty()); + + int nch = images[0].channels(); + Scalar scalefactor = param.scalefactor; + + if (param.ddepth == CV_8U) + { + CV_Assert(scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth"); + CV_Assert(param.mean == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); + } + for (size_t i = 0; i < images.size(); i++) { Size imgSize = images[i].size(); @@ -59,73 +107,122 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef size = imgSize; if (size != imgSize) { - if (crop) + if (param.paddingmode == DNN_PMODE_CROP_CENTER) { float resizeFactor = std::max(size.width / (float)imgSize.width, - size.height / (float)imgSize.height); + size.height / (float)imgSize.height); resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); Rect crop(Point(0.5 * (images[i].cols - size.width), - 0.5 * (images[i].rows - size.height)), - size); + 0.5 * (images[i].rows - size.height)), + size); images[i] = images[i](crop); } else - resize(images[i], images[i], size, 0, 0, INTER_LINEAR); + { + if (param.paddingmode == DNN_PMODE_LETTERBOX) + { + float resizeFactor = std::min(size.width / (float)imgSize.width, + size.height / (float)imgSize.height); + int rh = int(imgSize.height * resizeFactor); + int rw = int(imgSize.width * resizeFactor); + resize(images[i], images[i], Size(rw, rh), INTER_LINEAR); + + int top = (size.height - rh)/2; + int bottom = size.height - top - rh; + int left = (size.width - rw)/2; + int right = size.width - left - rw; + copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT); + } + else + resize(images[i], images[i], size, 0, 0, INTER_LINEAR); + } } - if (images[i].depth() == CV_8U && ddepth == CV_32F) - images[i].convertTo(images[i], CV_32F); - Scalar mean = mean_; - if (swapRB) + + Scalar mean = param.mean; + if (param.swapRB) + { std::swap(mean[0], mean[2]); + std::swap(scalefactor[0], scalefactor[2]); + } + + if (images[i].depth() == CV_8U && param.ddepth == CV_32F) + images[i].convertTo(images[i], CV_32F); images[i] -= mean; - images[i] *= scalefactor; + multiply(images[i], scalefactor, images[i]); } size_t nimages = images.size(); Mat image0 = images[0]; - int nch = image0.channels(); CV_Assert(image0.dims == 2); - if (nch == 3 || nch == 4) + + if (param.datalayout == DNN_LAYOUT_NCHW) { - int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; - blob_.create(4, sz, ddepth); - Mat blob = blob_.getMat(); - Mat ch[4]; + if (nch == 3 || nch == 4) + { + int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; + blob_.create(4, sz, param.ddepth); + Mat blob = blob_.getMat(); + Mat ch[4]; - for (size_t i = 0; i < nimages; i++) + for (size_t i = 0; i < nimages; i++) + { + const Mat& image = images[i]; + CV_Assert(image.depth() == blob_.depth()); + nch = image.channels(); + CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); + CV_Assert(image.size() == image0.size()); + + for (int j = 0; j < nch; j++) + ch[j] = Mat(image.rows, image.cols, param.ddepth, blob.ptr((int)i, j)); + if (param.swapRB) + std::swap(ch[0], ch[2]); + split(image, ch); + } + } + else { - const Mat& image = images[i]; - CV_Assert(image.depth() == blob_.depth()); - nch = image.channels(); - CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); - CV_Assert(image.size() == image0.size()); + CV_Assert(nch == 1); + int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; + blob_.create(4, sz, param.ddepth); + Mat blob = blob_.getMat(); - for (int j = 0; j < nch; j++) - ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); - if (swapRB) - std::swap(ch[0], ch[2]); - split(image, ch); + for (size_t i = 0; i < nimages; i++) + { + const Mat& image = images[i]; + CV_Assert(image.depth() == blob_.depth()); + nch = image.channels(); + CV_Assert(image.dims == 2 && (nch == 1)); + CV_Assert(image.size() == image0.size()); + + image.copyTo(Mat(image.rows, image.cols, param.ddepth, blob.ptr((int)i, 0))); + } } } - else + else if (param.datalayout == DNN_LAYOUT_NHWC) { - CV_Assert(nch == 1); - int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; - blob_.create(4, sz, ddepth); + int sz[] = { (int)nimages, image0.rows, image0.cols, nch}; + blob_.create(4, sz, param.ddepth); Mat blob = blob_.getMat(); - + int subMatType = CV_MAKETYPE(param.ddepth, nch); for (size_t i = 0; i < nimages; i++) { const Mat& image = images[i]; CV_Assert(image.depth() == blob_.depth()); - nch = image.channels(); - CV_Assert(image.dims == 2 && (nch == 1)); + CV_Assert(image.channels() == image0.channels()); CV_Assert(image.size() == image0.size()); - - image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0))); + if (param.swapRB) + { + Mat tmpRB; + cvtColor(image, tmpRB, COLOR_BGR2RGB); + tmpRB.copyTo(Mat(tmpRB.rows, tmpRB.cols, subMatType, blob.ptr((int)i, 0))); + } + else + image.copyTo(Mat(image.rows, image.cols, subMatType, blob.ptr((int)i, 0))); } } + else + CV_Error(Error::StsUnsupportedFormat, "Unsupported data layout in blobFromImagesWithParams function."); } void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 44e70bac41..5863832119 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -64,16 +64,6 @@ static int toNCDHW(int idx) else return (5 + idx) % 4 + 1; } -// This values are used to indicate layer output's data layout where it's possible. -enum DataLayout -{ - DATA_LAYOUT_NHWC, - DATA_LAYOUT_NCHW, - DATA_LAYOUT_NDHWC, - DATA_LAYOUT_UNKNOWN, - DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) -}; - typedef std::vector > StrIntVector; struct Pin @@ -276,15 +266,15 @@ static DataLayout getDataLayout(const tensorflow::NodeDef& layer) { std::string format = getLayerAttr(layer, "data_format").s(); if (format == "NHWC" || format == "channels_last") - return DATA_LAYOUT_NHWC; + return DNN_LAYOUT_NHWC; else if (format == "NCHW" || format == "channels_first") - return DATA_LAYOUT_NCHW; + return DNN_LAYOUT_NCHW; else if (format == "NDHWC") - return DATA_LAYOUT_NDHWC; + return DNN_LAYOUT_NDHWC; else CV_Error(Error::StsParseError, "Unknown data_format value: " + format); } - return DATA_LAYOUT_UNKNOWN; + return DNN_LAYOUT_UNKNOWN; } static inline std::string getNodeName(const std::string& tensorName) @@ -299,7 +289,7 @@ DataLayout getDataLayout( ) { std::map::const_iterator it = data_layouts.find(getNodeName(layerName)); - return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN; + return it != data_layouts.end() ? it->second : DNN_LAYOUT_UNKNOWN; } static @@ -325,11 +315,11 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) const tensorflow::AttrValue& val = getLayerAttr(layer, "strides"); int dimX, dimY, dimC, dimD; int layout = getDataLayout(layer); - if (layout == DATA_LAYOUT_NCHW) + if (layout == DNN_LAYOUT_NCHW) { dimC = 1; dimY = 2; dimX = 3; } - else if (layout == DATA_LAYOUT_NDHWC) + else if (layout == DNN_LAYOUT_NDHWC) { dimD = 1; dimY = 2; dimX = 3; dimC = 4; } @@ -340,7 +330,7 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) if (!(val.list().i_size() == 4 || val.list().i_size() == 5) || val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported strides"); - if (layout == DATA_LAYOUT_NDHWC) { + if (layout == DNN_LAYOUT_NDHWC) { int strides[] = {static_cast(val.list().i(dimD)), static_cast(val.list().i(dimY)), static_cast(val.list().i(dimX))}; @@ -375,11 +365,11 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) const tensorflow::AttrValue& val = getLayerAttr(layer, "ksize"); int dimX, dimY, dimC, dimD; int layout = getDataLayout(layer); - if (layout == DATA_LAYOUT_NCHW) + if (layout == DNN_LAYOUT_NCHW) { dimC = 1; dimY = 2; dimX = 3; } - else if (layout == DATA_LAYOUT_NDHWC) + else if (layout == DNN_LAYOUT_NDHWC) { dimD = 1; dimY = 2; dimX = 3; dimC = 4; } @@ -391,7 +381,7 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) val.list().i(0) != 1 || val.list().i(dimC) != 1) CV_Error(Error::StsError, "Unsupported ksize"); - if (layout == DATA_LAYOUT_NDHWC) { + if (layout == DNN_LAYOUT_NDHWC) { int kernel[] = {static_cast(val.list().i(dimD)), static_cast(val.list().i(dimY)), static_cast(val.list().i(dimX))}; @@ -438,7 +428,7 @@ bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &lay pads[i] = protoPads.list().i(i); } - if (getDataLayout(layer) != DATA_LAYOUT_NCHW) + if (getDataLayout(layer) != DNN_LAYOUT_NCHW) { CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC."); // Perhaps, we have NHWC padding dimensions order. @@ -903,8 +893,8 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N connect(layer_id, dstNet, parsePin(input), id, 0); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN) - data_layouts[name] = DATA_LAYOUT_NHWC; + if (getDataLayout(name, data_layouts) == DNN_LAYOUT_UNKNOWN) + data_layouts[name] = DNN_LAYOUT_NHWC; } // "BiasAdd" "Add" "AddV2" "Sub" "AddN" @@ -1072,7 +1062,7 @@ void TFImporter::parseMatMul(tensorflow::GraphDef& net, const tensorflow::NodeDe // one input only int input_blob_index = kernel_blob_index == 0 ? 1 : 0; connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0); - data_layouts[name] = DATA_LAYOUT_PLANAR; + data_layouts[name] = DNN_LAYOUT_PLANAR; } void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) @@ -1100,7 +1090,7 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD bool changedType{false}; - if (inpLayout == DATA_LAYOUT_NHWC) + if (inpLayout == DNN_LAYOUT_NHWC) { if (newShapeSize >= 2 || newShape.at(1) == 1) { @@ -1108,11 +1098,11 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD addPermuteLayer(order, name + "/nhwc", inpId); if (newShapeSize < 4) { - inpLayout = DATA_LAYOUT_NCHW; + inpLayout = DNN_LAYOUT_NCHW; } else { - inpLayout = DATA_LAYOUT_NHWC; + inpLayout = DNN_LAYOUT_NHWC; changedType = newShapeSize == 4 && !hasSwap; } } @@ -1128,17 +1118,17 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD connect(layer_id, dstNet, inpId, id, 0); inpId = Pin(setName); - if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && + if ((inpLayout == DNN_LAYOUT_NHWC || inpLayout == DNN_LAYOUT_UNKNOWN || inpLayout == DNN_LAYOUT_PLANAR) && newShapeSize == 4 && !hasSwap) { int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. setName = changedType ? name : name + "/nchw"; addPermuteLayer(order, setName, inpId); - inpLayout = DATA_LAYOUT_NCHW; + inpLayout = DNN_LAYOUT_NCHW; } - data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; + data_layouts[name] = newShapeSize == 2 ? DNN_LAYOUT_PLANAR : inpLayout; } else { @@ -1206,13 +1196,13 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No addPermuteLayer(order, name + "/nhwc", inpId); // Convert shape From OpenCV's NCHW to NHWC. - if(inpLayout == DATA_LAYOUT_NHWC) + if(inpLayout == DNN_LAYOUT_NHWC) { std::swap(outShape[1], outShape[2]); std::swap(outShape[2], outShape[3]); } } - if(inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_NCHW) + if(inpLayout == DNN_LAYOUT_NHWC || inpLayout == DNN_LAYOUT_NCHW) { // toNCHW axis = (axis != 0)?(axis % outShapeSize + 1):0; @@ -1221,13 +1211,13 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No // After ExpendDims, 5-dim data will become 6-dim data, and OpenCV retains 6-dim data as original data layout. // Convert OpenCV's NCDHW to NDHWC first. - if (inpShape.size() == 5 && (inpLayout == DATA_LAYOUT_NDHWC || inpLayout == DATA_LAYOUT_UNKNOWN)) + if (inpShape.size() == 5 && (inpLayout == DNN_LAYOUT_NDHWC || inpLayout == DNN_LAYOUT_UNKNOWN)) { int order[] = {0, 2, 3, 4, 1}; // From OpenCV's NCDHW to NDHWC. addPermuteLayer(order, name + "/ndhwc", inpId, 5); // Convert shape From OpenCV's NCDHW to NDHWC. - if(inpLayout == DATA_LAYOUT_NDHWC) + if(inpLayout == DNN_LAYOUT_NDHWC) { std::swap(outShape[1], outShape[2]); std::swap(outShape[2], outShape[3]); @@ -1239,7 +1229,7 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No outShapeSize += 1; // From OpenCV's NCDHW to NDHWC. - if((inpLayout != DATA_LAYOUT_NHWC && inpLayout != DATA_LAYOUT_NCHW) && outShapeSize == 5) + if((inpLayout != DNN_LAYOUT_NHWC && inpLayout != DNN_LAYOUT_NCHW) && outShapeSize == 5) { for(int i = 1; i < outShapeSize - 1; i++) { @@ -1255,11 +1245,11 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No if(outShapeSize == 5) { - data_layouts[name] = DATA_LAYOUT_NDHWC; + data_layouts[name] = DNN_LAYOUT_NDHWC; } else if(outShapeSize == 4) { - data_layouts[name] = DATA_LAYOUT_NCHW; + data_layouts[name] = DNN_LAYOUT_NCHW; } else { @@ -1320,7 +1310,7 @@ void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeD layerParams.set("axis", start); layerParams.set("end_axis", end); } - if (inpLayout == DATA_LAYOUT_NHWC) + if (inpLayout == DNN_LAYOUT_NHWC) { LayerParams permLP; int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. @@ -1336,7 +1326,7 @@ void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeD int id = dstNet.addLayer(name, "Flatten", layerParams); layer_id[name] = id; connect(layer_id, dstNet, inpId, id, 0); - data_layouts[name] = DATA_LAYOUT_PLANAR; + data_layouts[name] = DNN_LAYOUT_PLANAR; } void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) @@ -1354,19 +1344,19 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod // keep NCHW layout this way. int inpLayout = getDataLayout(layer.input(0), data_layouts); std::string type = "Identity"; - if (inpLayout == DATA_LAYOUT_NHWC) + if (inpLayout == DNN_LAYOUT_NHWC) { if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2) { // in TensorFlow: NHWC->NCHW // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NCHW; + data_layouts[name] = DNN_LAYOUT_NCHW; } else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) { // in TensorFlow: NHWC->NHWC // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NHWC; + data_layouts[name] = DNN_LAYOUT_NHWC; } else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1) { @@ -1374,25 +1364,25 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod // in OpenCV: NCHW->NCWH int permData[] = {0, 1, 3, 2}; layerParams.set("order", DictValue::arrayInt(permData, perm.total())); - data_layouts[name] = DATA_LAYOUT_NCHW; // we keep track NCHW because channels position only matters + data_layouts[name] = DNN_LAYOUT_NCHW; // we keep track NCHW because channels position only matters type = "Permute"; } else CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); } - else if (inpLayout == DATA_LAYOUT_NCHW) + else if (inpLayout == DNN_LAYOUT_NCHW) { if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1) { // in TensorFlow: NCHW->NHWC // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NHWC; + data_layouts[name] = DNN_LAYOUT_NHWC; } else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) { // in TensorFlow: NCHW->NCHW // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NCHW; + data_layouts[name] = DNN_LAYOUT_NCHW; } else CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); @@ -1410,7 +1400,7 @@ void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::Nod // one input only connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + data_layouts[name] = DNN_LAYOUT_UNKNOWN; } } @@ -1456,9 +1446,9 @@ void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDe int axisId = (type == "Concat" ? 0 : num_inputs - 1); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC) axis = toNCHW(axis); - else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC) + else if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NDHWC) axis = toNCDHW(axis); layerParams.set("axis", axis); @@ -1585,7 +1575,7 @@ void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::N MatShape dims(shape.dim_size()); for (int i = 0; i < dims.size(); ++i) dims[i] = shape.dim(i).size(); - if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC) + if (dims.size() == 4 && predictedLayout == DNN_LAYOUT_NHWC) { std::swap(dims[1], dims[3]); // NHWC->NCWH std::swap(dims[2], dims[3]); // NCWH->NCHW @@ -1593,7 +1583,7 @@ void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::N dims[0] = 1; } - if (dims.size() == 5 && predictedLayout == DATA_LAYOUT_NDHWC) + if (dims.size() == 5 && predictedLayout == DNN_LAYOUT_NDHWC) { std::swap(dims[3], dims[4]); // NDHWC->NDHCW std::swap(dims[2], dims[3]); // NDHCW->NDCHW @@ -1624,7 +1614,7 @@ void TFImporter::parseSplit(tensorflow::GraphDef& net, const tensorflow::NodeDef // num_split // 1st blob is dims tensor int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC) axis = toNCHW(axis); layerParams.set("axis", axis); @@ -1654,7 +1644,7 @@ void TFImporter::parseSlice(tensorflow::GraphDef& net, const tensorflow::NodeDef CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); CV_CheckTypeEQ(sizes.type(), CV_32SC1, ""); - if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + if (begins.total() == 4 && getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC) { // Swap NHWC parameters' order to NCHW. std::swap(*begins.ptr(0, 2), *begins.ptr(0, 3)); @@ -1695,7 +1685,7 @@ void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow:: CV_Error(Error::StsNotImplemented, format("StridedSlice with stride %d", strides.at(i))); } - if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + if (begins.total() == 4 && getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC) { // Swap NHWC parameters' order to NCHW. std::swap(begins.at(2), begins.at(3)); @@ -2029,7 +2019,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso const int strideY = layerParams.get("stride_h"); const int strideX = layerParams.get("stride_w"); Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); - int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW); + int shift = (getDataLayout(layer) == DNN_LAYOUT_NCHW); const int outH = outShape.at(1 + shift) + begs[2] - ends[2]; const int outW = outShape.at(2 + shift) + begs[3] - ends[3]; if (layerParams.get("pad_mode") == "SAME") @@ -2141,7 +2131,7 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod // one input only connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + data_layouts[name] = DNN_LAYOUT_UNKNOWN; } // "ResizeNearestNeighbor" "ResizeBilinear" "FusedResizeAndPadConv2D" @@ -2239,7 +2229,7 @@ void TFImporter::parseL2Normalize(tensorflow::GraphDef& net, const tensorflow::N CV_Assert(reductionIndices.type() == CV_32SC1); const int numAxes = reductionIndices.total(); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + if (getDataLayout(name, data_layouts) == DNN_LAYOUT_NHWC) for (int i = 0; i < numAxes; ++i) reductionIndices.at(i) = toNCHW(reductionIndices.at(i)); @@ -2292,7 +2282,7 @@ void TFImporter::parsePriorBox(tensorflow::GraphDef& net, const tensorflow::Node layer_id[name] = id; connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + data_layouts[name] = DNN_LAYOUT_UNKNOWN; } void TFImporter::parseSoftmax(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) @@ -2417,7 +2407,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& if (!keepDims) { - if (layout == DATA_LAYOUT_NHWC) + if (layout == DNN_LAYOUT_NHWC) { LayerParams permLP; int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. @@ -2539,7 +2529,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); layer_id[flattenName] = flattenId; connect(layer_id, dstNet, Pin(poolingName), flattenId, 0); - data_layouts[name] = DATA_LAYOUT_PLANAR; + data_layouts[name] = DNN_LAYOUT_PLANAR; } } } @@ -2562,7 +2552,7 @@ void TFImporter::parsePack(tensorflow::GraphDef& net, const tensorflow::NodeDef& if (dim != 0) CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + data_layouts[name] = DNN_LAYOUT_UNKNOWN; CV_Assert(hasLayerAttr(layer, "N")); int num = (int)getLayerAttr(layer, "N").i(); @@ -2959,11 +2949,11 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map& cons } // If all inputs of specific layer have the same data layout we can say that -// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. +// this layer's output has this data layout too. Returns DNN_LAYOUT_UNKNOWN otherwise. DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) { DataLayout layout = getDataLayout(layer); - if (layout != DATA_LAYOUT_UNKNOWN) + if (layout != DNN_LAYOUT_UNKNOWN) { CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); return layout; @@ -2975,17 +2965,17 @@ DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) std::map::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); if (it != data_layouts.end()) { - if (layout != DATA_LAYOUT_UNKNOWN) + if (layout != DNN_LAYOUT_UNKNOWN) { - if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN) - return DATA_LAYOUT_UNKNOWN; + if (it->second != layout && it->second != DNN_LAYOUT_UNKNOWN) + return DNN_LAYOUT_UNKNOWN; } else layout = it->second; } } - if (layout != DATA_LAYOUT_UNKNOWN) + if (layout != DNN_LAYOUT_UNKNOWN) { CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); return layout; @@ -3061,14 +3051,14 @@ void TFImporter::populateNet() std::map::iterator it = data_layouts.find(name); if (it != data_layouts.end()) { - if (layout != DATA_LAYOUT_UNKNOWN) + if (layout != DNN_LAYOUT_UNKNOWN) { - if (it->second == DATA_LAYOUT_UNKNOWN) + if (it->second == DNN_LAYOUT_UNKNOWN) it->second = layout; else if (it->second != layout) { - it->second = DATA_LAYOUT_UNKNOWN; - layout = DATA_LAYOUT_UNKNOWN; + it->second = DNN_LAYOUT_UNKNOWN; + layout = DNN_LAYOUT_UNKNOWN; } } else @@ -3084,12 +3074,12 @@ void TFImporter::populateNet() it = data_layouts.find(name); if (it != data_layouts.end()) { - if (layout != DATA_LAYOUT_UNKNOWN) + if (layout != DNN_LAYOUT_UNKNOWN) { - if (it->second == DATA_LAYOUT_UNKNOWN) + if (it->second == DNN_LAYOUT_UNKNOWN) it->second = layout; else if (it->second != layout) - it->second = DATA_LAYOUT_UNKNOWN; + it->second = DNN_LAYOUT_UNKNOWN; } } else diff --git a/modules/dnn/src/tflite/tflite_importer.cpp b/modules/dnn/src/tflite/tflite_importer.cpp index cc09ec14eb..771aff103d 100644 --- a/modules/dnn/src/tflite/tflite_importer.cpp +++ b/modules/dnn/src/tflite/tflite_importer.cpp @@ -22,18 +22,6 @@ CV__DNN_INLINE_NS_BEGIN using namespace opencv_tflite; -// This values are used to indicate layer output's data layout where it's possible. -// Approach is similar to TensorFlow importer but TFLite models do not have explicit -// layout field "data_format". So we consider that all 4D inputs are in NHWC data layout. -enum DataLayout -{ - DATA_LAYOUT_NHWC, - DATA_LAYOUT_NCHW, - DATA_LAYOUT_NDHWC, - DATA_LAYOUT_UNKNOWN, - DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) -}; - class TFLiteImporter { public: TFLiteImporter(Net& net, const char* modelBuffer, size_t bufSize); @@ -139,10 +127,10 @@ DataLayout estimateLayout(const Tensor& t) const auto t_shape = t.shape(); CV_Assert(t_shape); switch (t_shape->size()) { - case 5: return DATA_LAYOUT_NDHWC; - case 4: return DATA_LAYOUT_NHWC; - case 2: return DATA_LAYOUT_PLANAR; - default: return DATA_LAYOUT_UNKNOWN; + case 5: return DNN_LAYOUT_NDHWC; + case 4: return DNN_LAYOUT_NHWC; + case 2: return DNN_LAYOUT_PLANAR; + default: return DNN_LAYOUT_UNKNOWN; } } @@ -161,7 +149,7 @@ void TFLiteImporter::populateNet() CV_Assert(opCodes); CV_Assert(modelTensors); - layouts.resize(modelTensors->size(), DATA_LAYOUT_UNKNOWN); + layouts.resize(modelTensors->size(), DNN_LAYOUT_UNKNOWN); size_t subgraph_inputs_size = subgraph_inputs->size(); std::vector inputsNames(subgraph_inputs_size); std::vector inputsShapes(subgraph_inputs_size); @@ -177,7 +165,7 @@ void TFLiteImporter::populateNet() // Keep info about origin inputs names and shapes inputsNames[i] = tensor->name()->str(); std::vector shape(tensor->shape()->begin(), tensor->shape()->end()); - if (layouts[idx] == DATA_LAYOUT_NHWC) { + if (layouts[idx] == DNN_LAYOUT_NHWC) { CV_CheckEQ(shape.size(), (size_t)4, ""); std::swap(shape[2], shape[3]); std::swap(shape[1], shape[2]); @@ -257,14 +245,14 @@ void TFLiteImporter::populateNet() // Predict output layout. Some layer-specific parsers may set them explicitly. // Otherwise, propagate input layout. - if (layouts[op_outputs->Get(0)] == DATA_LAYOUT_UNKNOWN) { - DataLayout predictedLayout = DATA_LAYOUT_UNKNOWN; + if (layouts[op_outputs->Get(0)] == DNN_LAYOUT_UNKNOWN) { + DataLayout predictedLayout = DNN_LAYOUT_UNKNOWN; for (auto layout : inpLayouts) { - if (layout != DATA_LAYOUT_UNKNOWN) { - if (predictedLayout == DATA_LAYOUT_UNKNOWN) + if (layout != DNN_LAYOUT_UNKNOWN) { + if (predictedLayout == DNN_LAYOUT_UNKNOWN) predictedLayout = layout; else if (predictedLayout != layout) { - predictedLayout = DATA_LAYOUT_UNKNOWN; + predictedLayout = DNN_LAYOUT_UNKNOWN; break; } } @@ -491,11 +479,11 @@ void TFLiteImporter::parseUnpooling(const Operator& op, const std::string& opcod void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) { DataLayout inpLayout = layouts[op.inputs()->Get(0)]; - if (inpLayout == DATA_LAYOUT_NHWC) { + if (inpLayout == DNN_LAYOUT_NHWC) { // Permute to NCHW int permId = addPermuteLayer({0, 2, 3, 1}, layerParams.name + "/permute", layerIds[op.inputs()->Get(0)]); // NCHW -> NHWC layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0); - layouts[op.outputs()->Get(0)] = DATA_LAYOUT_NCHW; + layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NCHW; } layerParams.type = "Reshape"; @@ -514,7 +502,7 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode, int axis = options->axis(); DataLayout inpLayout = layouts[op.inputs()->Get(0)]; - if (inpLayout == DATA_LAYOUT_NHWC) { + if (inpLayout == DNN_LAYOUT_NHWC) { // OpenCV works in NCHW data layout. So change the axis correspondingly. axis = normalize_axis(axis, 4); static const int remap[] = {0, 2, 3, 1}; diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 0fab7551a5..4ee3e013cb 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -63,6 +63,63 @@ TEST(imagesFromBlob, Regression) } } +TEST(blobFromImageWithParams_4ch, NHWC_scalar_scale) +{ + Mat img(10, 10, CV_8UC4, cv::Scalar(0,1,2,3)); + std::vector factorVec = {0.1, 0.2, 0.3, 0.4}; + + Scalar scalefactor(factorVec[0], factorVec[1], factorVec[2], factorVec[3]); + + Image2BlobParams param; + param.scalefactor = scalefactor; + param.datalayout = DNN_LAYOUT_NHWC; + Mat blob = dnn::blobFromImageWithParams(img, param); // [1, 10, 10, 4] + + float* blobPtr = blob.ptr(0); + std::vector targetVec = {(float )factorVec[0] * 0, (float )factorVec[1] * 1, (float )factorVec[2] * 2, (float )factorVec[3] * 3}; // Target Value. + for (int hi = 0; hi < 10; hi++) + { + for (int wi = 0; wi < 10; wi++) + { + float* hwPtr = blobPtr + hi * 10 * 4 + wi * 4; + + // Check equal + EXPECT_NEAR(hwPtr[0], targetVec[0], 1e-5); + EXPECT_NEAR(hwPtr[1], targetVec[1], 1e-5); + EXPECT_NEAR(hwPtr[2], targetVec[2], 1e-5); + EXPECT_NEAR(hwPtr[3], targetVec[3], 1e-5); + } + } +} + +TEST(blobFromImageWithParams_4ch, letter_box) +{ + Mat img(40, 20, CV_8UC4, cv::Scalar(0,1,2,3)); + + // Construct target mat. + Mat targetCh[4]; + // The letterbox will add zero at the left and right of output blob. + // After the letterbox, every row data would have same value showing as valVec. + std::vector valVec = {0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0}; + Mat rowM(1, 20, CV_8UC1, valVec.data()); + + for(int i = 0; i < 4; i++) + { + targetCh[i] = rowM * i; + } + + Mat targetImg; + merge(targetCh, 4, targetImg); + Size targeSize(20, 20); + + Image2BlobParams param; + param.size = targeSize; + param.paddingmode = DNN_PMODE_LETTERBOX; + Mat blob = dnn::blobFromImageWithParams(img, param); + Mat targetBlob = dnn::blobFromImage(targetImg, 1.0, targeSize); // only convert data from uint8 to float32. + EXPECT_EQ(0, cvtest::norm(targetBlob, blob, NORM_INF)); +} + TEST(readNet, Regression) { Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),