Merge pull request #21506 from alalek:core_fp_denormals

pull/21525/head
Alexander Alekhin 3 years ago
commit 83ce1de8e7
  1. 29
      modules/core/include/opencv2/core/utils/fp_control.private.hpp
  2. 69
      modules/core/include/opencv2/core/utils/fp_control_utils.hpp
  3. 14
      modules/core/src/parallel.cpp
  4. 79
      modules/core/src/system.cpp
  5. 65
      modules/core/test/test_misc.cpp

@ -0,0 +1,29 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
#include "fp_control_utils.hpp"
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0
// disabled
#elif defined(OPENCV_IMPL_FP_HINTS)
// custom
#elif defined(OPENCV_IMPL_FP_HINTS_X86)
// custom
#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
#include <xmmintrin.h>
#define OPENCV_IMPL_FP_HINTS_X86 1
#define OPENCV_IMPL_FP_HINTS 1
#endif
#ifndef OPENCV_IMPL_FP_HINTS
#define OPENCV_IMPL_FP_HINTS 0
#endif
#ifndef OPENCV_IMPL_FP_HINTS_X86
#define OPENCV_IMPL_FP_HINTS_X86 0
#endif
#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP

@ -0,0 +1,69 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP
#define OPENCV_CORE_FP_CONTROL_UTILS_HPP
namespace cv {
namespace details {
struct FPDenormalsModeState
{
uint32_t reserved[16]; // 64-bytes
}; // FPDenormalsModeState
CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state);
CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state);
CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state);
class FPDenormalsIgnoreHintScope
{
public:
inline explicit FPDenormalsIgnoreHintScope(bool ignore = true)
{
details::setFPDenormalsIgnoreHint(ignore, saved_state);
}
inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state)
{
details::saveFPDenormalsState(saved_state);
details::restoreFPDenormalsState(state);
}
inline ~FPDenormalsIgnoreHintScope()
{
details::restoreFPDenormalsState(saved_state);
}
protected:
FPDenormalsModeState saved_state;
}; // FPDenormalsIgnoreHintScope
class FPDenormalsIgnoreHintScopeNOOP
{
public:
inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); }
inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); }
inline ~FPDenormalsIgnoreHintScopeNOOP() { }
}; // FPDenormalsIgnoreHintScopeNOOP
} // namespace details
// Should depend on target compilation architecture only
// Note: previously added archs should NOT be removed to preserve ABI compatibility
#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT)
// preserve configuration overloading through ports
#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_)
typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope;
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1
#else
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0
typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope;
#endif
} // namespace cv
#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP

@ -142,6 +142,9 @@
#include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
#include <opencv2/core/utils/fp_control_utils.hpp>
#include <opencv2/core/utils/fp_control.private.hpp>
using namespace cv;
namespace cv {
@ -191,6 +194,9 @@ namespace {
// propagate main thread state
rng = cv::theRNG();
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
details::saveFPDenormalsState(fp_denormals_base_state);
#endif
#ifdef OPENCV_TRACE
traceRootRegion = CV_TRACE_NS::details::getCurrentRegion();
@ -271,6 +277,11 @@ namespace {
}
}
}
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
details::FPDenormalsModeState fp_denormals_base_state;
#endif
private:
ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled
ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled
@ -307,6 +318,9 @@ namespace {
// propagate main thread state
cv::theRNG() = ctx.rng;
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state);
#endif
cv::Range r;
cv::Range wholeRange = ctx.wholeRange;

@ -53,6 +53,9 @@
#include <opencv2/core/utils/tls.hpp>
#include <opencv2/core/utils/instrumentation.hpp>
#include <opencv2/core/utils/fp_control_utils.hpp>
#include <opencv2/core/utils/fp_control.private.hpp>
#ifndef OPENCV_WITH_THREAD_SANITIZER
#if defined(__clang__) && defined(__has_feature)
#if __has_feature(thread_sanitizer)
@ -2733,6 +2736,82 @@ void setUseIPP_NE(bool flag)
} // namespace ipp
namespace details {
#if OPENCV_IMPL_FP_HINTS_X86
#ifndef _MM_DENORMALS_ZERO_ON // requires pmmintrin.h (SSE3)
#define _MM_DENORMALS_ZERO_ON 0x0040
#endif
#ifndef _MM_DENORMALS_ZERO_MASK // requires pmmintrin.h (SSE3)
#define _MM_DENORMALS_ZERO_MASK 0x0040
#endif
#endif
void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state)
{
#if OPENCV_IMPL_FP_HINTS_X86
unsigned mask = _MM_FLUSH_ZERO_MASK;
unsigned value = ignore ? _MM_FLUSH_ZERO_ON : 0;
if (featuresEnabled.have[CPU_SSE3])
{
mask |= _MM_DENORMALS_ZERO_MASK;
value |= ignore ? _MM_DENORMALS_ZERO_ON : 0;
}
const unsigned old_flags = _mm_getcsr();
const unsigned old_value = old_flags & mask;
unsigned flags = (old_flags & ~mask) | value;
CV_LOG_DEBUG(NULL, "core: update FP mxcsr flags = " << cv::format("0x%08x", flags));
// save state
state.reserved[0] = (uint32_t)mask;
state.reserved[1] = (uint32_t)old_value;
_mm_setcsr(flags);
#else
CV_UNUSED(ignore); CV_UNUSED(state);
#endif
}
int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state)
{
#if OPENCV_IMPL_FP_HINTS_X86
unsigned mask = _MM_FLUSH_ZERO_MASK;
if (featuresEnabled.have[CPU_SSE3])
{
mask |= _MM_DENORMALS_ZERO_MASK;
}
const unsigned old_flags = _mm_getcsr();
const unsigned old_value = old_flags & mask;
// save state
state.reserved[0] = (uint32_t)mask;
state.reserved[1] = (uint32_t)old_value;
return 2;
#else
CV_UNUSED(state);
return 0;
#endif
}
bool restoreFPDenormalsState(const FPDenormalsModeState& state)
{
#if OPENCV_IMPL_FP_HINTS_X86
const unsigned mask = (unsigned)state.reserved[0];
CV_DbgAssert(mask != 0); // invalid state (ensure that state is properly saved earlier)
const unsigned value = (unsigned)state.reserved[1];
CV_DbgCheck((int)value, value == (value & mask), "invalid SSE FP state");
const unsigned old_flags = _mm_getcsr();
unsigned flags = (old_flags & ~mask) | value;
CV_LOG_DEBUG(NULL, "core: restore FP mxcsr flags = " << cv::format("0x%08x", flags));
_mm_setcsr(flags);
return true;
#else
CV_UNUSED(state);
return false;
#endif
}
} // namespace details
} // namespace cv
#ifdef HAVE_TEGRA_OPTIMIZATION

@ -3,6 +3,15 @@
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "opencv2/core/utils/logger.hpp"
#include <opencv2/core/utils/fp_control_utils.hpp>
#ifdef CV_CXX11
#include <chrono>
#include <thread>
#endif
namespace opencv_test { namespace {
TEST(Core_OutputArrayCreate, _1997)
@ -242,6 +251,62 @@ TEST(Core_Parallel, propagate_exceptions)
}, cv::Exception);
}
class FPDenormalsHintCheckerParallelLoopBody : public cv::ParallelLoopBody
{
public:
FPDenormalsHintCheckerParallelLoopBody()
: isOK(true)
{
state_values_to_check = cv::details::saveFPDenormalsState(base_state);
}
~FPDenormalsHintCheckerParallelLoopBody() {}
void operator()(const cv::Range& r) const
{
CV_UNUSED(r);
cv::details::FPDenormalsModeState state;
if (cv::details::saveFPDenormalsState(state))
{
for (int i = 0; i < state_values_to_check; ++i)
{
if (base_state.reserved[i] != state.reserved[i])
{
CV_LOG_ERROR(NULL, cv::format("FP state[%d] mismatch: base=0x%08x thread=0x%08x", i, base_state.reserved[i], state.reserved[i]));
isOK = false;
cv::details::restoreFPDenormalsState(base_state);
}
}
}
else
{
// FP state is not supported
// no checks
}
#ifdef CV_CXX11
std::this_thread::sleep_for(std::chrono::milliseconds(100));
#endif
}
cv::details::FPDenormalsModeState base_state;
int state_values_to_check;
mutable bool isOK;
};
TEST(Core_Parallel, propagate_fp_denormals_ignore_hint)
{
int nThreads = std::max(1, cv::getNumThreads()) * 3;
for (int i = 0; i < 4; ++i)
{
SCOPED_TRACE(cv::format("Case=%d: FP denormals ignore hint: %s\n", i, ((i & 1) != 0) ? "enable" : "disable"));
FPDenormalsIgnoreHintScope fp_denormals_scope((i & 1) != 0);
FPDenormalsHintCheckerParallelLoopBody job;
ASSERT_NO_THROW({
parallel_for_(cv::Range(0, nThreads), job);
});
EXPECT_TRUE(job.isOK);
}
}
TEST(Core_Version, consistency)
{
// this test verifies that OpenCV version loaded in runtime

Loading…
Cancel
Save