From 7175f257b513fd1e45d108dda2d1f56575d839e8 Mon Sep 17 00:00:00 2001 From: David <34099314+dmonterom@users.noreply.github.com> Date: Thu, 7 Jun 2018 15:29:04 +0200 Subject: [PATCH] Added ResizeBilinear op for tf (#11050) * Added ResizeBilinear op for tf Combined ResizeNearestNeighbor and ResizeBilinear layers into Resize (with an interpolation param). Minor changes to tf_importer and resize layer to save some code lines Minor changes in init.cpp Minor changes in tf_importer.cpp * Replaced implementation of a custom ResizeBilinear layer to all layers * Use Mat::ptr. Replace interpolation flags --- .../dnn/include/opencv2/dnn/all_layers.hpp | 8 +- modules/dnn/perf/perf_net.cpp | 8 + modules/dnn/src/darknet/darknet_io.cpp | 3 +- modules/dnn/src/init.cpp | 2 +- .../dnn/src/layers/crop_and_resize_layer.cpp | 2 +- modules/dnn/src/layers/resize_layer.cpp | 176 +++++++++++++++++ .../layers/resize_nearest_neighbor_layer.cpp | 117 ----------- modules/dnn/src/tensorflow/tf_importer.cpp | 30 ++- modules/dnn/test/test_tf_importer.cpp | 186 ++++-------------- samples/dnn/text_detection.cpp | 5 - 10 files changed, 253 insertions(+), 284 deletions(-) create mode 100644 modules/dnn/src/layers/resize_layer.cpp delete mode 100644 modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index cc8521586c..55b85a0b56 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -565,14 +565,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN }; /** - * @brief Resize input 4-dimensional blob by nearest neighbor strategy. + * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy. * - * Layer is used to support TensorFlow's resize_nearest_neighbor op. + * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops. */ - class CV_EXPORTS ResizeNearestNeighborLayer : public Layer + class CV_EXPORTS ResizeLayer : public Layer { public: - static Ptr create(const LayerParams& params); + static Ptr create(const LayerParams& params); }; class CV_EXPORTS ProposalLayer : public Layer diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index fff74df130..8e777f8ae5 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -236,6 +236,14 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3) processNet("dnn/yolov3.cfg", "dnn/yolov3.weights", "", inp / 255); } +PERF_TEST_P_(DNNTestNetwork, EAST_text_detection) +{ + if (backend == DNN_BACKEND_HALIDE || + backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) + throw SkipTestException(""); + processNet("dnn/frozen_east_text_detection.pb", "", "", Mat(cv::Size(320, 320), CV_32FC3)); +} + const tuple testCases[] = { #ifdef HAVE_HALIDE tuple(DNN_BACKEND_HALIDE, DNN_TARGET_CPU), diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 707cc29095..91ebb0fa8b 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -395,9 +395,10 @@ namespace cv { { cv::dnn::LayerParams param; param.name = "Upsample-name"; - param.type = "ResizeNearestNeighbor"; + param.type = "Resize"; param.set("zoom_factor", scaleFactor); + param.set("interpolation", "nearest"); darknet::LayerParameter lp; std::string layer_name = cv::format("upsample_%d", layer_id); diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 2bff16c4eb..e5c3a279e5 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -83,7 +83,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Concat, ConcatLayer); CV_DNN_REGISTER_LAYER_CLASS(Reshape, ReshapeLayer); CV_DNN_REGISTER_LAYER_CLASS(Flatten, FlattenLayer); - CV_DNN_REGISTER_LAYER_CLASS(ResizeNearestNeighbor, ResizeNearestNeighborLayer); + CV_DNN_REGISTER_LAYER_CLASS(Resize, ResizeLayer); CV_DNN_REGISTER_LAYER_CLASS(CropAndResize, CropAndResizeLayer); CV_DNN_REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer); diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp index 3f92a8488d..a9bca1f04b 100644 --- a/modules/dnn/src/layers/crop_and_resize_layer.cpp +++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp @@ -68,7 +68,7 @@ public: { float input_y = top * (inpHeight - 1) + y * heightScale; int y0 = static_cast(input_y); - const float* inpData_row0 = (float*)inp.data + y0 * inpWidth; + const float* inpData_row0 = inp.ptr(0, 0, y0); const float* inpData_row1 = (y0 + 1 < inpHeight) ? (inpData_row0 + inpWidth) : inpData_row0; for (int x = 0; x < outWidth; ++x) { diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp new file mode 100644 index 0000000000..82bc6542be --- /dev/null +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -0,0 +1,176 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2017, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +#include "../precomp.hpp" +#include "layers_common.hpp" +#include "../op_inf_engine.hpp" +#include + +namespace cv { namespace dnn { + +class ResizeLayerImpl CV_FINAL : public ResizeLayer +{ +public: + ResizeLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + outWidth = params.get("width", 0); + outHeight = params.get("height", 0); + if (params.has("zoom_factor")) + { + CV_Assert(!params.has("zoom_factor_x") && !params.has("zoom_factor_y")); + zoomFactorWidth = zoomFactorHeight = params.get("zoom_factor"); + } + else if (params.has("zoom_factor_x") || params.has("zoom_factor_y")) + { + CV_Assert(params.has("zoom_factor_x") && params.has("zoom_factor_y")); + zoomFactorWidth = params.get("zoom_factor_x"); + zoomFactorHeight = params.get("zoom_factor_y"); + } + interpolation = params.get("interpolation"); + CV_Assert(interpolation == "nearest" || interpolation == "bilinear"); + + alignCorners = params.get("align_corners", false); + if (alignCorners) + CV_Error(Error::StsNotImplemented, "Resize with align_corners=true is not implemented"); + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() == 1, inputs[0].size() == 4); + outputs.resize(1, inputs[0]); + outputs[0][2] = outHeight > 0 ? outHeight : (outputs[0][2] * zoomFactorHeight); + outputs[0][3] = outWidth > 0 ? outWidth : (outputs[0][3] * zoomFactorWidth); + // We can work in-place (do nothing) if input shape == output shape. + return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && interpolation == "nearest"; + } + + virtual void finalize(const std::vector& inputs, std::vector &outputs) CV_OVERRIDE + { + if (!outWidth && !outHeight) + { + outHeight = outputs[0].size[2]; + outWidth = outputs[0].size[3]; + } + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); + } + + void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + if (outHeight == inputs[0]->size[2] && outWidth == inputs[0]->size[3]) + return; + + Mat& inp = *inputs[0]; + Mat& out = outputs[0]; + if (interpolation == "nearest") + { + for (size_t n = 0; n < inputs[0]->size[0]; ++n) + { + for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch) + { + resize(getPlane(inp, n, ch), getPlane(out, n, ch), + Size(outWidth, outHeight), 0, 0, INTER_NEAREST); + } + } + } + else if (interpolation == "bilinear") + { + const int inpHeight = inp.size[2]; + const int inpWidth = inp.size[3]; + const int inpSpatialSize = inpHeight * inpWidth; + const int outSpatialSize = outHeight * outWidth; + const float heightScale = static_cast(inpHeight) / (outHeight); + const float widthScale = static_cast(inpWidth) / (outWidth); + const int numPlanes = inp.size[0] * inp.size[1]; + CV_Assert(inp.isContinuous(), out.isContinuous()); + + Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight); + Mat outPlanes = out.reshape(1, numPlanes * outHeight); + for (int y = 0; y < outHeight; ++y) + { + float input_y = y * heightScale; + int y0 = static_cast(input_y); + const float* inpData_row0 = inpPlanes.ptr(y0); + const float* inpData_row1 = inpPlanes.ptr(std::min(y0 + 1, inpHeight - 1)); + for (int x = 0; x < outWidth; ++x) + { + float input_x = x * widthScale; + int x0 = static_cast(input_x); + int x1 = std::min(x0 + 1, inpWidth - 1); + + float* outData = outPlanes.ptr(y, x); + const float* inpData_row0_c = inpData_row0; + const float* inpData_row1_c = inpData_row1; + for (int c = 0; c < numPlanes; ++c) + { + *outData = inpData_row0_c[x0] + + (input_y - y0) * (inpData_row1_c[x0] - inpData_row0_c[x0]) + + (input_x - x0) * (inpData_row0_c[x1] - inpData_row0_c[x0] + + (input_y - y0) * (inpData_row1_c[x1] - inpData_row0_c[x1] - inpData_row1_c[x0] + inpData_row0_c[x0])); + + inpData_row0_c += inpSpatialSize; + inpData_row1_c += inpSpatialSize; + outData += outSpatialSize; + } + } + } + } + else + CV_Error(Error::StsNotImplemented, "Unknown interpolation: " + interpolation); + } + + virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE + { +#ifdef HAVE_INF_ENGINE + InferenceEngine::LayerParams lp; + lp.name = name; + lp.type = "Resample"; + lp.precision = InferenceEngine::Precision::FP32; + + std::shared_ptr ieLayer(new InferenceEngine::CNNLayer(lp)); + ieLayer->params["type"] = "caffe.ResampleParameter.NEAREST"; + ieLayer->params["antialias"] = "0"; + ieLayer->params["width"] = cv::format("%d", outWidth); + ieLayer->params["height"] = cv::format("%d", outHeight); + + return Ptr(new InfEngineBackendNode(ieLayer)); +#endif // HAVE_INF_ENGINE + return Ptr(); + } + +private: + int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight; + String interpolation; + bool alignCorners; +}; + + +Ptr ResizeLayer::create(const LayerParams& params) +{ + return Ptr(new ResizeLayerImpl(params)); +} + +} // namespace dnn +} // namespace cv diff --git a/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp b/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp deleted file mode 100644 index 703b7a438f..0000000000 --- a/modules/dnn/src/layers/resize_nearest_neighbor_layer.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -// Copyright (C) 2017, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -#include "../precomp.hpp" -#include "layers_common.hpp" -#include "../op_inf_engine.hpp" -#include - -namespace cv { namespace dnn { - -class ResizeNearestNeighborLayerImpl CV_FINAL : public ResizeNearestNeighborLayer -{ -public: - ResizeNearestNeighborLayerImpl(const LayerParams& params) - { - setParamsFrom(params); - CV_Assert(params.has("width") && params.has("height") || params.has("zoom_factor")); - CV_Assert(!params.has("width") && !params.has("height") || !params.has("zoom_factor")); - outWidth = params.get("width", 0); - outHeight = params.get("height", 0); - zoomFactor = params.get("zoom_factor", 1); - alignCorners = params.get("align_corners", false); - if (alignCorners) - CV_Error(Error::StsNotImplemented, "Nearest neighborhood resize with align_corners=true is not implemented"); - } - - bool getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const CV_OVERRIDE - { - CV_Assert(inputs.size() == 1, inputs[0].size() == 4); - outputs.resize(1, inputs[0]); - outputs[0][2] = outHeight > 0 ? outHeight : (outputs[0][2] * zoomFactor); - outputs[0][3] = outWidth > 0 ? outWidth : (outputs[0][3] * zoomFactor); - // We can work in-place (do nothing) if input shape == output shape. - return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]); - } - - virtual bool supportBackend(int backendId) CV_OVERRIDE - { - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine(); - } - - virtual void finalize(const std::vector& inputs, std::vector &outputs) CV_OVERRIDE - { - if (!outWidth && !outHeight) - { - outHeight = outputs[0].size[2]; - outWidth = outputs[0].size[3]; - } - } - - void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); - } - - void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - if (outHeight == inputs[0]->size[2] && outWidth == inputs[0]->size[3]) - return; - - Mat& inp = *inputs[0]; - Mat& out = outputs[0]; - for (size_t n = 0; n < inputs[0]->size[0]; ++n) - { - for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch) - { - resize(getPlane(inp, n, ch), getPlane(out, n, ch), - Size(outWidth, outHeight), 0, 0, INTER_NEAREST); - } - } - } - - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { -#ifdef HAVE_INF_ENGINE - InferenceEngine::LayerParams lp; - lp.name = name; - lp.type = "Resample"; - lp.precision = InferenceEngine::Precision::FP32; - - std::shared_ptr ieLayer(new InferenceEngine::CNNLayer(lp)); - ieLayer->params["type"] = "caffe.ResampleParameter.NEAREST"; - ieLayer->params["antialias"] = "0"; - ieLayer->params["width"] = cv::format("%d", outWidth); - ieLayer->params["height"] = cv::format("%d", outHeight); - - return Ptr(new InfEngineBackendNode(ieLayer)); -#endif // HAVE_INF_ENGINE - return Ptr(); - } - -private: - int outWidth, outHeight, zoomFactor; - bool alignCorners; -}; - - -Ptr ResizeNearestNeighborLayer::create(const LayerParams& params) -{ - return Ptr(new ResizeNearestNeighborLayerImpl(params)); -} - -} // namespace dnn -} // namespace cv diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index f19daf9cc6..4bff84175d 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1450,18 +1450,36 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); data_layouts[name] = DATA_LAYOUT_UNKNOWN; } - else if (type == "ResizeNearestNeighbor") + else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear") { - Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2); + if (layer.input_size() == 2) + { + Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(outSize.type() == CV_32SC1, outSize.total() == 2); + layerParams.set("height", outSize.at(0, 0)); + layerParams.set("width", outSize.at(0, 1)); + } + else if (layer.input_size() == 3) + { + Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_Assert(factorHeight.type() == CV_32SC1, factorHeight.total() == 1, + factorWidth.type() == CV_32SC1, factorWidth.total() == 1); + layerParams.set("zoom_factor_x", factorWidth.at(0)); + layerParams.set("zoom_factor_y", factorHeight.at(0)); + } + else + CV_Assert(layer.input_size() == 2 || layer.input_size() == 3); - layerParams.set("height", outSize.at(0, 0)); - layerParams.set("width", outSize.at(0, 1)); + if (type == "ResizeNearestNeighbor") + layerParams.set("interpolation", "nearest"); + else + layerParams.set("interpolation", "bilinear"); if (hasLayerAttr(layer, "align_corners")) layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b()); - int id = dstNet.addLayer(name, "ResizeNearestNeighbor", layerParams); + int id = dstNet.addLayer(name, "Resize", layerParams); layer_id[name] = id; connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 2690d7242b..4f024114ef 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -317,6 +317,43 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8) normAssertDetections(ref, out, "", 0.9, 3.4e-3, 1e-2); } +// inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png') +// inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3) +// outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'), +// sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')], +// feed_dict={'input_images:0': inp}) +// scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2)) +// geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2)) +// np.save('east_text_detection.scores.npy', scores) +// np.save('east_text_detection.geometry.npy', geometry) +TEST_P(Test_TensorFlow_nets, EAST_text_detection) +{ + std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false); + std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false); + std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false); + std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false); + + Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false)); + + net.setPreferableTarget(GetParam()); + + Mat img = imread(imgPath); + Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false); + net.setInput(inp); + + std::vector outs; + std::vector outNames(2); + outNames[0] = "feature_fusion/Conv_7/Sigmoid"; + outNames[1] = "feature_fusion/concat_3"; + net.forward(outs, outNames); + + Mat scores = outs[0]; + Mat geometry = outs[1]; + + normAssert(scores, blobFromNPY(refScoresPath), "scores"); + normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_nets, availableDnnTargets()); typedef testing::TestWithParam Test_TensorFlow_fp16; @@ -396,159 +433,10 @@ TEST(Test_TensorFlow, memory_read) runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true); } -// Test a custom layer. -class ResizeBilinearLayer CV_FINAL : public Layer -{ -public: - ResizeBilinearLayer(const LayerParams ¶ms) : Layer(params), - outWidth(0), outHeight(0), factorWidth(1), factorHeight(1) - { - CV_Assert(!params.get("align_corners", false)); - CV_Assert(!blobs.empty()); - - for (size_t i = 0; i < blobs.size(); ++i) - CV_Assert(blobs[i].type() == CV_32SC1); - - if (blobs.size() == 1) - { - CV_Assert(blobs[0].total() == 2); - outHeight = blobs[0].at(0, 0); - outWidth = blobs[0].at(0, 1); - } - else - { - CV_Assert(blobs.size() == 2, blobs[0].total() == 1, blobs[1].total() == 1); - factorHeight = blobs[0].at(0, 0); - factorWidth = blobs[1].at(0, 0); - outHeight = outWidth = 0; - } - } - - static Ptr create(LayerParams& params) - { - return Ptr(new ResizeBilinearLayer(params)); - } - - virtual bool getMemoryShapes(const std::vector > &inputs, - const int requiredOutputs, - std::vector > &outputs, - std::vector > &internals) const CV_OVERRIDE - { - std::vector outShape(4); - outShape[0] = inputs[0][0]; // batch size - outShape[1] = inputs[0][1]; // number of channels - outShape[2] = outHeight != 0 ? outHeight : (inputs[0][2] * factorHeight); - outShape[3] = outWidth != 0 ? outWidth : (inputs[0][3] * factorWidth); - outputs.assign(1, outShape); - return false; - } - - virtual void finalize(const std::vector& inputs, std::vector &outputs) CV_OVERRIDE - { - if (!outWidth && !outHeight) - { - outHeight = outputs[0].size[2]; - outWidth = outputs[0].size[3]; - } - } - - // This implementation is based on a reference implementation from - // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h - virtual void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) CV_OVERRIDE - { - Mat& inp = *inputs[0]; - Mat& out = outputs[0]; - const float* inpData = (float*)inp.data; - float* outData = (float*)out.data; - - const int batchSize = inp.size[0]; - const int numChannels = inp.size[1]; - const int inpHeight = inp.size[2]; - const int inpWidth = inp.size[3]; - - float heightScale = static_cast(inpHeight) / outHeight; - float widthScale = static_cast(inpWidth) / outWidth; - for (int b = 0; b < batchSize; ++b) - { - for (int y = 0; y < outHeight; ++y) - { - float input_y = y * heightScale; - int y0 = static_cast(std::floor(input_y)); - int y1 = std::min(y0 + 1, inpHeight - 1); - for (int x = 0; x < outWidth; ++x) - { - float input_x = x * widthScale; - int x0 = static_cast(std::floor(input_x)); - int x1 = std::min(x0 + 1, inpWidth - 1); - for (int c = 0; c < numChannels; ++c) - { - float interpolation = - inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) + - inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) + - inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) + - inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0); - outData[offset(out.size, c, x, y, b)] = interpolation; - } - } - } - } - } - - virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {} - -private: - static inline int offset(const MatSize& size, int c, int x, int y, int b) - { - return x + size[3] * (y + size[2] * (c + size[1] * b)); - } - - int outWidth, outHeight, factorWidth, factorHeight; -}; - TEST(Test_TensorFlow, resize_bilinear) { - CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); runTensorFlowNet("resize_bilinear"); runTensorFlowNet("resize_bilinear_factor"); - LayerFactory::unregisterLayer("ResizeBilinear"); -} - -// inp = cv.imread('opencv_extra/testdata/cv/ximgproc/sources/08.png') -// inp = inp[:,:,[2, 1, 0]].astype(np.float32).reshape(1, 512, 512, 3) -// outs = sess.run([sess.graph.get_tensor_by_name('feature_fusion/Conv_7/Sigmoid:0'), -// sess.graph.get_tensor_by_name('feature_fusion/concat_3:0')], -// feed_dict={'input_images:0': inp}) -// scores = np.ascontiguousarray(outs[0].transpose(0, 3, 1, 2)) -// geometry = np.ascontiguousarray(outs[1].transpose(0, 3, 1, 2)) -// np.save('east_text_detection.scores.npy', scores) -// np.save('east_text_detection.geometry.npy', geometry) -TEST(Test_TensorFlow, EAST_text_detection) -{ - CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); - std::string netPath = findDataFile("dnn/frozen_east_text_detection.pb", false); - std::string imgPath = findDataFile("cv/ximgproc/sources/08.png", false); - std::string refScoresPath = findDataFile("dnn/east_text_detection.scores.npy", false); - std::string refGeometryPath = findDataFile("dnn/east_text_detection.geometry.npy", false); - - Net net = readNet(findDataFile("dnn/frozen_east_text_detection.pb", false)); - net.setPreferableBackend(DNN_BACKEND_OPENCV); - - Mat img = imread(imgPath); - Mat inp = blobFromImage(img, 1.0, Size(), Scalar(123.68, 116.78, 103.94), true, false); - net.setInput(inp); - - std::vector outs; - std::vector outNames(2); - outNames[0] = "feature_fusion/Conv_7/Sigmoid"; - outNames[1] = "feature_fusion/concat_3"; - net.forward(outs, outNames); - - Mat scores = outs[0]; - Mat geometry = outs[1]; - - normAssert(scores, blobFromNPY(refScoresPath), "scores"); - normAssert(geometry, blobFromNPY(refGeometryPath), "geometry", 1e-4, 3e-3); - LayerFactory::unregisterLayer("ResizeBilinear"); } } diff --git a/samples/dnn/text_detection.cpp b/samples/dnn/text_detection.cpp index 5abe6b6884..f69d13f124 100644 --- a/samples/dnn/text_detection.cpp +++ b/samples/dnn/text_detection.cpp @@ -2,8 +2,6 @@ #include #include -#include "custom_layers.hpp" - using namespace cv; using namespace cv::dnn; @@ -38,9 +36,6 @@ int main(int argc, char** argv) CV_Assert(parser.has("model")); String model = parser.get("model"); - // Register a custom layer. - CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer); - // Load network. Net net = readNet(model);