dnn: apply hint to ignore denormals processing

3 years ago · 70b0274c8e
parent b1d484f827
commit 70b0274c8e
7 changed files with 32 additions and 11 deletions
--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
@ -53,6 +53,8 @@
 #include "caffe_io.hpp"
 #endif

+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 namespace cv {
 namespace dnn {
 CV__DNN_EXPERIMENTAL_NS_BEGIN
@ -88,6 +90,8 @@ MatShape parseBlobShape(const caffe::BlobShape& _input_shape)

 class CaffeImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
    caffe::NetParameter net;
    caffe::NetParameter netBinary;

--- a/modules/dnn/src/darknet/darknet_importer.cpp
+++ b/modules/dnn/src/darknet/darknet_importer.cpp
@ -51,6 +51,7 @@

 #include "darknet_io.hpp"

+#include <opencv2/core/utils/fp_control_utils.hpp>

 namespace cv {
 namespace dnn {
@ -61,6 +62,8 @@ namespace

 class DarknetImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
    darknet::NetParameter net;

 public:
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -55,6 +55,8 @@
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/imgproc.hpp>

+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/logger.hpp>

@ -3504,6 +3506,9 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
    CV_UNUSED(xml); CV_UNUSED(bin);
    CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else
+
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
    InferenceEngine::CNNNetReader reader;
    reader.ReadNetwork(xml);
@ -3540,6 +3545,8 @@ Net Net::readFromModelOptimizer(
    CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else

+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
    InferenceEngine::CNNNetReader reader;

@ -3639,6 +3646,7 @@ Mat Net::forward(const String& outputName)
 {
    CV_TRACE_FUNCTION();
    CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;

    String layerName = outputName;

@ -3660,6 +3668,7 @@ AsyncArray Net::forwardAsync(const String& outputName)
 {
    CV_TRACE_FUNCTION();
    CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;

 #ifdef CV_CXX11
    String layerName = outputName;
@ -3691,6 +3700,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
 {
    CV_TRACE_FUNCTION();
    CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;

    String layerName = outputName;

@ -3769,6 +3779,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs,
                  const std::vector<String>& outBlobNames)
 {
    CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;

    std::vector<LayerPin> pins;
    for (int i = 0; i < outBlobNames.size(); i++)
@ -3796,6 +3807,7 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
                     const std::vector<String>& outBlobNames)
 {
    CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;

    std::vector<LayerPin> pins;
    for (int i = 0; i < outBlobNames.size(); i++)
@ -3886,6 +3898,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
 {
    CV_TRACE_FUNCTION();
    CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;

    LayerPin pin;
    pin.lid = 0;
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1629,13 +1629,6 @@ public:
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

-#if CV_SSE3
-        uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE();
-        uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE();
-        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
-        _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
-#endif
-
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

@ -1748,10 +1741,6 @@ public:
            ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
                            kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
        }
-#if CV_SSE3
-        _MM_SET_FLUSH_ZERO_MODE(ftzMode);
-        _MM_SET_DENORMALS_ZERO_MODE(dazMode);
-#endif
    }

    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -8,6 +8,8 @@
 #include "../precomp.hpp"
 #include <opencv2/dnn/shape_utils.hpp>

+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #undef CV_LOG_STRIP_LEVEL
 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
@ -40,6 +42,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN

 class ONNXImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
    opencv_onnx::ModelProto model_proto;
    struct LayerInfo {
        int layerId;
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -11,6 +11,8 @@ Implementation of Tensorflow models parser

 #include "../precomp.hpp"

+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #include <opencv2/dnn/shape_utils.hpp>
 #undef CV_LOG_STRIP_LEVEL
@ -509,6 +511,7 @@ void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int in

 class TFImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 public:
    TFImporter(Net& net, const char *model, const char *config = NULL);
    TFImporter(Net& net, const char *dataModel, size_t lenModel,
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@ -40,6 +40,9 @@
 //M*/

 #include "../precomp.hpp"
+
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <limits>
 #include <set>
 #include <map>
@ -106,6 +109,8 @@ static inline bool endsWith(const String &str, const char *substr)

 struct TorchImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
    typedef std::map<String, std::pair<int, Mat> > TensorsMap;
    Net net;