From e41ba90f17d646bb6fc3e8acbc3332bc0423c817 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 21 Jul 2023 09:13:37 +0300 Subject: [PATCH] Merge pull request #24004 from dkurt:tflite_new_layers [TFLite] Pack layer and other fixes for SSD from Keras #24004 ### Pull Request Readiness Checklist resolves https://github.com/opencv/opencv/issues/23992 **Merge with extra**: https://github.com/opencv/opencv_extra/pull/1076 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/dnn/src/tflite/tflite_importer.cpp | 105 ++++++++++++++++++--- modules/dnn/test/test_tflite_importer.cpp | 20 +++- 2 files changed, 111 insertions(+), 14 deletions(-) diff --git a/modules/dnn/src/tflite/tflite_importer.cpp b/modules/dnn/src/tflite/tflite_importer.cpp index 4a186eaee0..7feded69ce 100644 --- a/modules/dnn/src/tflite/tflite_importer.cpp +++ b/modules/dnn/src/tflite/tflite_importer.cpp @@ -59,6 +59,7 @@ private: void parseUnpooling(const Operator& op, const std::string& opcode, LayerParams& layerParams); void parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams); void parseConcat(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parsePack(const Operator& op, const std::string& opcode, LayerParams& layerParams); void parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams); void parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams); void parseQuantize(const Operator& op, const std::string& opcode, LayerParams& layerParams); @@ -70,6 +71,8 @@ private: void parseActivation(const Operator& op, const std::string& opcode, LayerParams& layerParams, bool isFused); void addLayer(LayerParams& layerParams, const Operator& op); int addPermuteLayer(const std::vector& order, const std::string& permName, const std::pair& inpId, int dtype); + int addReshapeLayer(const std::vector& shape, int axis, int num_axes, + const std::string& name, const std::pair& inpId, int dtype); inline bool isInt8(const Operator& op); inline void getQuantParams(const Operator& op, float& inpScale, int& inpZero, float& outScale, int& outZero); }; @@ -267,6 +270,7 @@ TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap() dispatch["PAD"] = &TFLiteImporter::parsePadding; dispatch["RESHAPE"] = &TFLiteImporter::parseReshape; dispatch["CONCATENATION"] = &TFLiteImporter::parseConcat; + dispatch["PACK"] = &TFLiteImporter::parsePack; dispatch["RESIZE_BILINEAR"] = dispatch["RESIZE_NEAREST_NEIGHBOR"] = &TFLiteImporter::parseResize; dispatch["Convolution2DTransposeBias"] = &TFLiteImporter::parseDeconvolution; dispatch["QUANTIZE"] = &TFLiteImporter::parseQuantize; @@ -596,16 +600,6 @@ void TFLiteImporter::parseUnpooling(const Operator& op, const std::string& opcod void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) { DataLayout inpLayout = layouts[op.inputs()->Get(0)]; - if (inpLayout == DNN_LAYOUT_NHWC) { - // Permute to NCHW - std::vector order = {0, 2, 3, 1}; - const std::string name = layerParams.name + "/permute"; - auto inpId = layerIds[op.inputs()->Get(0)]; - int permId = addPermuteLayer(order, name, inpId, isInt8(op) ? CV_8S : CV_32F); // NCHW -> NHWC - layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0); - layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NCHW; - } - layerParams.type = "Reshape"; std::vector shape; if (op.inputs()->size() > 1) { @@ -615,6 +609,22 @@ void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, CV_Assert(options); shape.assign(options->new_shape()->begin(), options->new_shape()->end()); } + + if (inpLayout == DNN_LAYOUT_NHWC) { + if (shape.size() == 4) { + // Keep data but change a shape to OpenCV's NCHW order + std::swap(shape[2], shape[3]); + std::swap(shape[1], shape[2]); + } else { + // Permute to NCHW entire data and reshape to given a shape + std::vector order = {0, 2, 3, 1}; + const std::string name = layerParams.name + "/permute"; + auto inpId = layerIds[op.inputs()->Get(0)]; + int permId = addPermuteLayer(order, name, inpId, isInt8(op) ? CV_8S : CV_32F); // NCHW -> NHWC + layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0); + layouts[op.outputs()->Get(0)] = DNN_LAYOUT_NCHW; + } + } layerParams.set("dim", DictValue::arrayInt(shape.data(), shape.size())); addLayer(layerParams, op); } @@ -636,6 +646,47 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode, parseFusedActivation(op, options->fused_activation_function()); } +void TFLiteImporter::parsePack(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + auto options = reinterpret_cast(op.builtin_options()); + int axis = options->axis(); + + DataLayout inpLayout = layouts[op.inputs()->Get(0)]; + if (inpLayout == DNN_LAYOUT_NHWC) { + // OpenCV works in NCHW data layout. So change the axis correspondingly. + axis = normalize_axis(axis, 5); // 5 because Pack adds a new axis so -1 would mean 4 + static const int remap[] = {0, 1, 3, 4, 2}; + axis = remap[axis]; + } + + // Replace Pack layer to Reshape + Concat + // Use a set because there are models which replicate single layer data by Pack. + std::set op_inputs(op.inputs()->begin(), op.inputs()->end()); + std::map > originLayerIds; + for (int inp : op_inputs) { + auto inpId = layerIds[inp]; + int dims = modelTensors->Get(inp)->shape()->size(); + + std::vector shape{1, -1}; + if (axis == dims) { + std::swap(shape[0], shape[1]); + } + const auto name = modelTensors->Get(inp)->name()->str() + "/reshape"; + int reshapeId = addReshapeLayer(shape, axis == dims ? dims - 1 : axis, 1, + name, inpId, isInt8(op) ? CV_8S : CV_32F); + + originLayerIds[inp] = layerIds[inp]; + layerIds[inp] = std::make_pair(reshapeId, 0); + } + layerParams.type = "Concat"; + layerParams.set("axis", axis); + addLayer(layerParams, op); + + // Restore origin layer inputs + for (const auto& ids : originLayerIds) { + layerIds[ids.first] = ids.second; + } +} + void TFLiteImporter::parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams) { layerParams.type = "Resize"; @@ -666,6 +717,18 @@ int TFLiteImporter::addPermuteLayer(const std::vector& order, const std::st return permId; } +int TFLiteImporter::addReshapeLayer(const std::vector& shape, int axis, int num_axes, + const std::string& name, const std::pair& inpId, int dtype) +{ + LayerParams lp; + lp.set("axis", axis); + lp.set("dim", DictValue::arrayInt(shape.data(), shape.size())); + lp.set("num_axes", num_axes); + int id = dstNet.addLayer(name, "Reshape", dtype, lp); + dstNet.connect(inpId.first, inpId.second, id, 0); + return id; +} + void TFLiteImporter::parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) { layerParams.type = "Deconvolution"; @@ -771,6 +834,8 @@ void TFLiteImporter::parseDetectionPostProcess(const Operator& op, const std::st parameters[keys[i]] = *reinterpret_cast(data + offset + i * 4); } + parameters["num_classes"] = modelTensors->Get(op.inputs()->Get(1))->shape()->Get(2); + layerParams.type = "DetectionOutput"; layerParams.set("num_classes", parameters["num_classes"]); layerParams.set("share_location", true); @@ -780,7 +845,6 @@ void TFLiteImporter::parseDetectionPostProcess(const Operator& op, const std::st layerParams.set("top_k", parameters["max_detections"]); layerParams.set("keep_top_k", parameters["max_detections"]); layerParams.set("code_type", "CENTER_SIZE"); - layerParams.set("variance_encoded_in_target", true); layerParams.set("loc_pred_transposed", true); // Replace third input from tensor to Const layer with the priors @@ -796,10 +860,27 @@ void TFLiteImporter::parseDetectionPostProcess(const Operator& op, const std::st priors.col(2) = priors.col(0) + priors.col(3); priors.col(3) = priors.col(1) + tmp; + float x_scale = *(float*)¶meters["x_scale"]; + float y_scale = *(float*)¶meters["y_scale"]; + float w_scale = *(float*)¶meters["w_scale"]; + float h_scale = *(float*)¶meters["h_scale"]; + if (x_scale != 1.0f || y_scale != 1.0f || w_scale != 1.0f || h_scale != 1.0f) { + int numPriors = priors.rows; + priors.resize(numPriors * 2); + Mat_ scales({1, 4}, {1.f / x_scale, 1.f / y_scale, + 1.f / w_scale, 1.f / h_scale}); + repeat(scales, numPriors, 1, priors.rowRange(numPriors, priors.rows)); + priors = priors.reshape(1, {1, 2, (int)priors.total() / 2}); + layerParams.set("variance_encoded_in_target", false); + } else { + priors = priors.reshape(1, {1, 1, (int)priors.total()}); + layerParams.set("variance_encoded_in_target", true); + } + LayerParams priorsLP; priorsLP.name = layerParams.name + "/priors"; priorsLP.type = "Const"; - priorsLP.blobs.resize(1, priors.reshape(1, {1, 1, (int)priors.total()})); + priorsLP.blobs.resize(1, priors); int priorsId = dstNet.addLayer(priorsLP.name, priorsLP.type, priorsLP); layerIds[op.inputs()->Get(2)] = std::make_pair(priorsId, 0); diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp index 5a1742ed97..c5bee0c086 100644 --- a/modules/dnn/test/test_tflite_importer.cpp +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -31,9 +31,8 @@ void testInputShapes(const Net& net, const std::vector& inps) { } } -void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) +void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) { - Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); testInputShapes(net, {input}); net.setInput(input); @@ -49,6 +48,12 @@ void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, } } +void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) +{ + Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); + testModel(net, modelName, input, l1, lInf); +} + void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4) { Mat input = imread(findDataFile("cv/shared/lena.png")); @@ -56,6 +61,13 @@ void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e testModel(modelName, input, l1, lInf); } +void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4) +{ + Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy")); + Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite")); + testModel(net, modelName, inp, l1, lInf); +} + // https://google.github.io/mediapipe/solutions/face_mesh TEST(Test_TFLite, face_landmark) { @@ -146,6 +158,10 @@ TEST(Test_TFLite, EfficientDet_int8) { normAssertDetections(ref, out, "", 0.5, 0.05, 0.1); } +TEST(Test_TFLite, replicate_by_pack) { + testLayer("replicate_by_pack"); +} + }} // namespace #endif // OPENCV_TEST_DNN_TFLITE