Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/11316/head
Alexander Alekhin 7 years ago
commit 4b2d1aaeea
  1. 2
      CMakeLists.txt
  2. 10
      cmake/templates/cvconfig.h.in
  3. 4
      doc/tutorials/gpu/gpu-basics-similarity/gpu_basics_similarity.markdown
  4. 1
      modules/core/include/opencv2/core/base.hpp
  5. 135
      modules/core/include/opencv2/core/check.hpp
  6. 25
      modules/core/include/opencv2/core/hal/intrin_cpp.hpp
  7. 25
      modules/core/include/opencv2/core/hal/intrin_neon.hpp
  8. 35
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  9. 131
      modules/core/include/opencv2/core/hal/intrin_vsx.hpp
  10. 110
      modules/core/include/opencv2/core/vsx_utils.hpp
  11. 160
      modules/core/src/check.cpp
  12. 50
      modules/core/src/ocl.cpp
  13. 30
      modules/core/src/system.cpp
  14. 14
      modules/core/test/test_intrin_utils.hpp
  15. 499
      modules/core/test/test_misc.cpp
  16. 2
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  17. 3
      modules/dnn/include/opencv2/dnn/dnn.hpp
  18. 1
      modules/dnn/misc/quantize_face_detector.py
  19. 35
      modules/dnn/perf/perf_net.cpp
  20. 6
      modules/dnn/src/caffe/opencv-caffe.proto
  21. 48
      modules/dnn/src/dnn.cpp
  22. 18
      modules/dnn/src/layers/batch_norm_layer.cpp
  23. 20
      modules/dnn/src/layers/blank_layer.cpp
  24. 38
      modules/dnn/src/layers/convolution_layer.cpp
  25. 4
      modules/dnn/src/layers/fully_connected_layer.cpp
  26. 15
      modules/dnn/src/layers/layers_common.cpp
  27. 85
      modules/dnn/src/layers/normalize_bbox_layer.cpp
  28. 19
      modules/dnn/src/layers/scale_layer.cpp
  29. 21
      modules/dnn/src/layers/shift_layer.cpp
  30. 129
      modules/dnn/src/op_inf_engine.cpp
  31. 19
      modules/dnn/src/op_inf_engine.hpp
  32. 24
      modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
  33. 61
      modules/dnn/src/tensorflow/tf_importer.cpp
  34. 50
      modules/dnn/test/test_backends.cpp
  35. 38
      modules/dnn/test/test_layers.cpp
  36. 2
      modules/dnn/test/test_precomp.hpp
  37. 7
      modules/dnn/test/test_tf_importer.cpp
  38. 2
      modules/imgproc/perf/opencl/perf_imgwarp.cpp
  39. 6
      modules/imgproc/perf/perf_warp.cpp
  40. 2
      modules/imgproc/src/deriv.cpp
  41. 143
      modules/imgproc/src/fixedpoint.inl.hpp
  42. 975
      modules/imgproc/src/imgwarp.cpp
  43. 1060
      modules/imgproc/src/smooth.cpp
  44. 51
      modules/imgproc/test/test_imgwarp_strict.cpp
  45. 1
      samples/dnn/face_detector/opencv_face_detector.pbtxt

@ -316,7 +316,7 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC ) OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC )
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC ) OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF (CV_GCC AND PPC64LE) ) OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF ((CV_GCC OR CV_CLANG) AND PPC64LE) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )

@ -46,13 +46,13 @@
/* Cocoa API */ /* Cocoa API */
#cmakedefine HAVE_COCOA #cmakedefine HAVE_COCOA
/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/ /* NVIDIA CUDA Basic Linear Algebra Subprograms (BLAS) API*/
#cmakedefine HAVE_CUBLAS #cmakedefine HAVE_CUBLAS
/* NVidia Cuda Runtime API*/ /* NVIDIA CUDA Runtime API*/
#cmakedefine HAVE_CUDA #cmakedefine HAVE_CUDA
/* NVidia Cuda Fast Fourier Transform (FFT) API*/ /* NVIDIA CUDA Fast Fourier Transform (FFT) API*/
#cmakedefine HAVE_CUFFT #cmakedefine HAVE_CUFFT
/* IEEE1394 capturing support */ /* IEEE1394 capturing support */
@ -124,10 +124,10 @@
/* Microsoft Media Foundation Capture library */ /* Microsoft Media Foundation Capture library */
#cmakedefine HAVE_MSMF #cmakedefine HAVE_MSMF
/* NVidia Video Decoding API*/ /* NVIDIA Video Decoding API*/
#cmakedefine HAVE_NVCUVID #cmakedefine HAVE_NVCUVID
/* NVidia Video Encoding API*/ /* NVIDIA Video Encoding API*/
#cmakedefine HAVE_NVCUVENC #cmakedefine HAVE_NVCUVENC
/* OpenCL Support */ /* OpenCL Support */

@ -8,7 +8,7 @@ Goal
In the @ref tutorial_video_input_psnr_ssim tutorial I already presented the PSNR and SSIM methods for checking In the @ref tutorial_video_input_psnr_ssim tutorial I already presented the PSNR and SSIM methods for checking
the similarity between the two images. And as you could see, the execution process takes quite some the similarity between the two images. And as you could see, the execution process takes quite some
time , especially in the case of the SSIM. However, if the performance numbers of an OpenCV time , especially in the case of the SSIM. However, if the performance numbers of an OpenCV
implementation for the CPU do not satisfy you and you happen to have an NVidia CUDA GPU device in implementation for the CPU do not satisfy you and you happen to have an NVIDIA CUDA GPU device in
your system, all is not lost. You may try to port or write your owm algorithm for the video card. your system, all is not lost. You may try to port or write your owm algorithm for the video card.
This tutorial will give a good grasp on how to approach coding by using the GPU module of OpenCV. As This tutorial will give a good grasp on how to approach coding by using the GPU module of OpenCV. As
@ -187,7 +187,7 @@ introduce asynchronous OpenCV GPU calls too with the help of the @ref cv::cuda::
Result and conclusion Result and conclusion
--------------------- ---------------------
On an Intel P8700 laptop CPU paired with a low end NVidia GT220M, here are the performance numbers: On an Intel P8700 laptop CPU paired with a low end NVIDIA GT220M, here are the performance numbers:
@code @code
Time of PSNR CPU (averaged for 10 runs): 41.4122 milliseconds. With result of: 19.2506 Time of PSNR CPU (averaged for 10 runs): 41.4122 milliseconds. With result of: 19.2506
Time of PSNR GPU (averaged for 10 runs): 158.977 milliseconds. With result of: 19.2506 Time of PSNR GPU (averaged for 10 runs): 158.977 milliseconds. With result of: 19.2506

@ -754,5 +754,6 @@ CV_EXPORTS_W void setUseIPP_NE(bool flag);
#include "opencv2/core/neon_utils.hpp" #include "opencv2/core/neon_utils.hpp"
#include "opencv2/core/vsx_utils.hpp" #include "opencv2/core/vsx_utils.hpp"
#include "opencv2/core/check.hpp"
#endif //OPENCV_CORE_BASE_HPP #endif //OPENCV_CORE_BASE_HPP

@ -0,0 +1,135 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_CHECK_HPP
#define OPENCV_CORE_CHECK_HPP
#include <opencv2/core/base.hpp>
namespace cv {
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
CV_EXPORTS const char* depthToString(int depth);
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
CV_EXPORTS const String typeToString(int type);
//! @cond IGNORED
namespace detail {
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
CV_EXPORTS const char* depthToString_(int depth);
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
CV_EXPORTS const cv::String typeToString_(int type);
enum TestOp {
TEST_CUSTOM = 0,
TEST_EQ = 1,
TEST_NE = 2,
TEST_LE = 3,
TEST_LT = 4,
TEST_GE = 5,
TEST_GT = 6,
CV__LAST_TEST_OP
};
struct CheckContext {
const char* func;
const char* file;
int line;
enum TestOp testOp;
const char* message;
const char* p1_str;
const char* p2_str;
};
#ifndef CV__CHECK_FILENAME
# define CV__CHECK_FILENAME __FILE__
#endif
#ifndef CV__CHECK_FUNCTION
# if defined _MSC_VER
# define CV__CHECK_FUNCTION __FUNCSIG__
# elif defined __GNUC__
# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
# else
# define CV__CHECK_FUNCTION "<unknown>"
# endif
#endif
#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
#define CV__TEST_NE(v1, v2) ((v1) != (v2))
#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
#define CV__TEST_LT(v1, v2) ((v1) < (v2))
#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
#define CV__TEST_GT(v1, v2) ((v1) > (v2))
#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
if(CV__TEST_##op((v1), (v2))) ; else { \
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
} \
} while (0)
#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
if(!!(test_expr)) ; else { \
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
} \
} while (0)
} // namespace
//! @endcond
/// Supported values of these types: int, float, double
#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
/// Check with additional "decoding" of type values in error message
#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
/// Check with additional "decoding" of depth values in error message
#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
/// Example: type == CV_8UC1 || type == CV_8UC3
#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
/// Example: depth == CV_32F || depth == CV_64F
#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
// TODO define pretty-printers: #define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
} // namespace
#endif // OPENCV_CORE_CHECK_HPP

@ -795,7 +795,7 @@ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>
/** @brief Multiply and add /** @brief Multiply and add
Returns \f$ a*b + c \f$ Returns \f$ a*b + c \f$
For floating point types only. */ For floating point types and signed 32bit int only. */
template<typename _Tp, int n> template<typename _Tp, int n>
inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
const v_reg<_Tp, n>& c) const v_reg<_Tp, n>& c)
@ -828,6 +828,29 @@ template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n
return c; return c;
} }
/** @brief Dot product of elements
Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
Scheme:
@code
{A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
{A1B1+A2B2+C1 ...} // 32-bit
@endcode
Implemented only for 16-bit signed source type (v_int16x8).
*/
template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg<typename V_TypeTraits<_Tp>::w_type, n / 2>& c)
{
typedef typename V_TypeTraits<_Tp>::w_type w_type;
v_reg<w_type, n/2> s;
for( int i = 0; i < (n/2); i++ )
s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i];
return s;
}
/** @brief Multiply and expand /** @brief Multiply and expand
Multiply values two registers and store results in two registers with wider pack type. Multiply values two registers and store results in two registers with wider pack type.

@ -506,6 +506,12 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
return v_int32x4(vaddq_s32(cd.val[0], cd.val[1])); return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
} }
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
{
v_int32x4 s = v_dotprod(a, b);
return v_int32x4(vaddq_s32(s.val , c.val));
}
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \ #define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \ OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \ OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
@ -730,6 +736,11 @@ inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_
return v_float32x4(vmlaq_f32(c.val, a.val, b.val)); return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
} }
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
{
return v_int32x4(vmlaq_s32(c.val, a.val, b.val));
}
#if CV_SIMD128_64F #if CV_SIMD128_64F
inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
{ {
@ -1095,6 +1106,18 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64) OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
#endif #endif
#if CV_SIMD128_64F
inline v_int32x4 v_round(const v_float32x4& a)
{
float32x4_t a_ = a.val;
int32x4_t result;
__asm__ ("fcvtns %0.4s, %1.4s"
: "=w"(result)
: "w"(a_)
: /* No clobbers */);
return v_int32x4(result);
}
#else
inline v_int32x4 v_round(const v_float32x4& a) inline v_int32x4 v_round(const v_float32x4& a)
{ {
static const int32x4_t v_sign = vdupq_n_s32(1 << 31), static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
@ -1103,7 +1126,7 @@ inline v_int32x4 v_round(const v_float32x4& a)
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val))); int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition)))); return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
} }
#endif
inline v_int32x4 v_floor(const v_float32x4& a) inline v_int32x4 v_floor(const v_float32x4& a)
{ {
int32x4_t a1 = vcvtq_s32_f32(a.val); int32x4_t a1 = vcvtq_s32_f32(a.val);

@ -710,6 +710,11 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
return v_int32x4(_mm_madd_epi16(a.val, b.val)); return v_int32x4(_mm_madd_epi16(a.val, b.val));
} }
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
{
return v_int32x4(_mm_add_epi32(_mm_madd_epi16(a.val, b.val), c.val));
}
#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \ #define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \ OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \ OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \
@ -954,6 +959,10 @@ inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
__m128i m = _mm_cmpgt_epi32(b.val, a.val); __m128i m = _mm_cmpgt_epi32(b.val, a.val);
return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m)); return v_uint32x4(_mm_sub_epi32(_mm_xor_si128(d, m), m));
} }
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
{
return a * b + c;
}
#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \ #define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
@ -1632,7 +1641,7 @@ inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2&
c = v_reinterpret_as_f64(t2); c = v_reinterpret_as_f64(t2);
} }
// 2-channel, float only // 2-channel
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
{ {
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
@ -1644,7 +1653,29 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3 b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
} }
inline void v_store_interleave( short* ptr, const v_int16x8& a, const v_int16x8& b ) inline void v_load_deinterleave(const short* ptr, v_int16x8& a, v_int16x8& b)
{
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
__m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
__m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
__m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
__m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
}
inline void v_load_deinterleave(const ushort*ptr, v_uint16x8& a, v_uint16x8& b)
{
v_int16x8 sa, sb;
v_load_deinterleave((const short*)ptr, sa, sb);
a = v_reinterpret_as_u16(sa);
b = v_reinterpret_as_u16(sb);
}
inline void v_store_interleave(short* ptr, const v_int16x8& a, const v_int16x8& b)
{ {
__m128i t0, t1; __m128i t0, t1;
t0 = _mm_unpacklo_epi16(a.val, b.val); t0 = _mm_unpacklo_epi16(a.val, b.val);

@ -1,46 +1,6 @@
/*M/////////////////////////////////////////////////////////////////////////////////////// // This file is part of OpenCV project.
// // It is subject to the license terms in the LICENSE file found in the top-level directory
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // of this distribution and at http://opencv.org/license.html
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_HAL_VSX_HPP #ifndef OPENCV_HAL_VSX_HPP
#define OPENCV_HAL_VSX_HPP #define OPENCV_HAL_VSX_HPP
@ -276,34 +236,38 @@ OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4) OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2) OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \ #define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \
inline _Tpvec v_load(const _Tp* ptr) \ inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(ld_func(0, ptr)); } \ { return _Tpvec(ld(0, ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \ inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \
{ return _Tpvec(ld_func(0, ptr)); } \ { return _Tpvec(ld_a(0, ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \ inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(vec_ld_l8(ptr)); } \ { return _Tpvec(vec_ld_l8(ptr)); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \ { return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
inline void v_store(_Tp* ptr, const _Tpvec& a) \ inline void v_store(_Tp* ptr, const _Tpvec& a) \
{ st_func(a.val, 0, ptr); } \ { st(a.val, 0, ptr); } \
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
{ st_func(a.val, 0, ptr); } \ { st_a(a.val, 0, ptr); } \
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
{ vec_st_l8(a.val, ptr); } \ { vec_st_l8(a.val, ptr); } \
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
{ vec_st_h8(a.val, ptr); } { vec_st_h8(a.val, ptr); }
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st) #define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st) OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st) OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st) OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st) OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st) OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st) OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2) OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int)
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2) OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st)
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
//////////////// Value reordering /////////////// //////////////// Value reordering ///////////////
@ -343,7 +307,7 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
b1.val = fl(a.val); \ b1.val = fl(a.val); \
} \ } \
inline _Tpwvec v_load_expand(const _Tp* ptr) \ inline _Tpwvec v_load_expand(const _Tp* ptr) \
{ return _Tpwvec(fh(vsx_ld(0, ptr))); } { return _Tpwvec(fh(vec_ld_l8(ptr))); }
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu) OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh) OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
@ -353,10 +317,10 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpac
OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh) OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
inline v_uint32x4 v_load_expand_q(const uchar* ptr) inline v_uint32x4 v_load_expand_q(const uchar* ptr)
{ return v_uint32x4(vec_ld_buw(ptr)); } { return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
inline v_int32x4 v_load_expand_q(const schar* ptr) inline v_int32x4 v_load_expand_q(const schar* ptr)
{ return v_int32x4(vec_ld_bsw(ptr)); } { return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
/* pack */ /* pack */
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \ #define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
@ -429,36 +393,6 @@ inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
d.val = vec_mergesql(a.val, b.val); d.val = vec_mergesql(a.val, b.val);
} }
/* Extract */
template<int s, typename _Tpvec>
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
{
const int w = sizeof(typename _Tpvec::lane_type);
const int n = _Tpvec::nlanes;
const unsigned int sf = ((w * n) - (s * w));
if (s == 0)
return _Tpvec(a.val);
else if (sf > 15)
return _Tpvec();
// bitwise it just to make xlc happy
return _Tpvec(vec_sld(b.val, a.val, sf & 15));
}
#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec) \
template<int s> \
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
{ \
switch(s) { \
case 0: return _Tpvec(a.val); \
case 2: return _Tpvec(b.val); \
case 1: return _Tpvec(vec_sldw(b.val, a.val, 2)); \
default: return _Tpvec(); \
} \
}
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
////////// Arithmetic, bitwise and comparison operations ///////// ////////// Arithmetic, bitwise and comparison operations /////////
/* Element-wise binary and unary operations */ /* Element-wise binary and unary operations */
@ -669,6 +603,11 @@ OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a) OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a)
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a) OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
/* Extract */
template<int s, typename _Tpvec>
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
{ return v_rotate_right<s>(a, b); }
////////// Reduce and mask ///////// ////////// Reduce and mask /////////
/** Reduce **/ /** Reduce **/
@ -821,6 +760,9 @@ inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4) OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2) OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c)
{ return a * b + c; }
// TODO: exp, log, sin, cos // TODO: exp, log, sin, cos
/** Absolute values **/ /** Absolute values **/
@ -904,6 +846,9 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); } { return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
{ return v_int32x4(vec_msum(a.val, b.val, c.val)); }
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
const v_float32x4& m1, const v_float32x4& m2, const v_float32x4& m1, const v_float32x4& m2,
const v_float32x4& m3) const v_float32x4& m3)

@ -1,46 +1,6 @@
/*M/////////////////////////////////////////////////////////////////////////////////////// // This file is part of OpenCV project.
// // It is subject to the license terms in the LICENSE file found in the top-level directory
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // of this distribution and at http://opencv.org/license.html
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_HAL_VSX_UTILS_HPP #ifndef OPENCV_HAL_VSX_UTILS_HPP
#define OPENCV_HAL_VSX_UTILS_HPP #define OPENCV_HAL_VSX_UTILS_HPP
@ -64,106 +24,77 @@ typedef __vector unsigned char vec_uchar16;
#define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__} #define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__}
#define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, c)) #define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, c))
#define vec_uchar16_c(v) ((vec_uchar16)(v)) #define vec_uchar16_c(v) ((vec_uchar16)(v))
#define vec_uchar16_mx vec_uchar16_sp(0xFF) #define vec_uchar16_z vec_uchar16_sp(0)
#define vec_uchar16_mn vec_uchar16_sp(0)
#define vec_uchar16_z vec_uchar16_mn
typedef __vector signed char vec_char16; typedef __vector signed char vec_char16;
#define vec_char16_set(...) (vec_char16){__VA_ARGS__} #define vec_char16_set(...) (vec_char16){__VA_ARGS__}
#define vec_char16_sp(c) (__VSX_S16__(vec_char16, c)) #define vec_char16_sp(c) (__VSX_S16__(vec_char16, c))
#define vec_char16_c(v) ((vec_char16)(v)) #define vec_char16_c(v) ((vec_char16)(v))
#define vec_char16_mx vec_char16_sp(0x7F)
#define vec_char16_mn vec_char16_sp(-0x7F-1)
#define vec_char16_z vec_char16_sp(0) #define vec_char16_z vec_char16_sp(0)
typedef __vector unsigned short vec_ushort8; typedef __vector unsigned short vec_ushort8;
#define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__} #define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__}
#define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, c)) #define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, c))
#define vec_ushort8_c(v) ((vec_ushort8)(v)) #define vec_ushort8_c(v) ((vec_ushort8)(v))
#define vec_ushort8_mx vec_ushort8_sp(0xFFFF) #define vec_ushort8_z vec_ushort8_sp(0)
#define vec_ushort8_mn vec_ushort8_sp(0)
#define vec_ushort8_z vec_ushort8_mn
typedef __vector signed short vec_short8; typedef __vector signed short vec_short8;
#define vec_short8_set(...) (vec_short8){__VA_ARGS__} #define vec_short8_set(...) (vec_short8){__VA_ARGS__}
#define vec_short8_sp(c) (__VSX_S8__(vec_short8, c)) #define vec_short8_sp(c) (__VSX_S8__(vec_short8, c))
#define vec_short8_c(v) ((vec_short8)(v)) #define vec_short8_c(v) ((vec_short8)(v))
#define vec_short8_mx vec_short8_sp(0x7FFF)
#define vec_short8_mn vec_short8_sp(-0x7FFF-1)
#define vec_short8_z vec_short8_sp(0) #define vec_short8_z vec_short8_sp(0)
typedef __vector unsigned int vec_uint4; typedef __vector unsigned int vec_uint4;
#define vec_uint4_set(...) (vec_uint4){__VA_ARGS__} #define vec_uint4_set(...) (vec_uint4){__VA_ARGS__}
#define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, c)) #define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, c))
#define vec_uint4_c(v) ((vec_uint4)(v)) #define vec_uint4_c(v) ((vec_uint4)(v))
#define vec_uint4_mx vec_uint4_sp(0xFFFFFFFFU) #define vec_uint4_z vec_uint4_sp(0)
#define vec_uint4_mn vec_uint4_sp(0)
#define vec_uint4_z vec_uint4_mn
typedef __vector signed int vec_int4; typedef __vector signed int vec_int4;
#define vec_int4_set(...) (vec_int4){__VA_ARGS__} #define vec_int4_set(...) (vec_int4){__VA_ARGS__}
#define vec_int4_sp(c) (__VSX_S4__(vec_int4, c)) #define vec_int4_sp(c) (__VSX_S4__(vec_int4, c))
#define vec_int4_c(v) ((vec_int4)(v)) #define vec_int4_c(v) ((vec_int4)(v))
#define vec_int4_mx vec_int4_sp(0x7FFFFFFF)
#define vec_int4_mn vec_int4_sp(-0x7FFFFFFF-1)
#define vec_int4_z vec_int4_sp(0) #define vec_int4_z vec_int4_sp(0)
typedef __vector float vec_float4; typedef __vector float vec_float4;
#define vec_float4_set(...) (vec_float4){__VA_ARGS__} #define vec_float4_set(...) (vec_float4){__VA_ARGS__}
#define vec_float4_sp(c) (__VSX_S4__(vec_float4, c)) #define vec_float4_sp(c) (__VSX_S4__(vec_float4, c))
#define vec_float4_c(v) ((vec_float4)(v)) #define vec_float4_c(v) ((vec_float4)(v))
#define vec_float4_mx vec_float4_sp(3.40282347E+38F)
#define vec_float4_mn vec_float4_sp(1.17549435E-38F)
#define vec_float4_z vec_float4_sp(0) #define vec_float4_z vec_float4_sp(0)
typedef __vector unsigned long long vec_udword2; typedef __vector unsigned long long vec_udword2;
#define vec_udword2_set(...) (vec_udword2){__VA_ARGS__} #define vec_udword2_set(...) (vec_udword2){__VA_ARGS__}
#define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, c)) #define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, c))
#define vec_udword2_c(v) ((vec_udword2)(v)) #define vec_udword2_c(v) ((vec_udword2)(v))
#define vec_udword2_mx vec_udword2_sp(18446744073709551615ULL) #define vec_udword2_z vec_udword2_sp(0)
#define vec_udword2_mn vec_udword2_sp(0)
#define vec_udword2_z vec_udword2_mn
typedef __vector signed long long vec_dword2; typedef __vector signed long long vec_dword2;
#define vec_dword2_set(...) (vec_dword2){__VA_ARGS__} #define vec_dword2_set(...) (vec_dword2){__VA_ARGS__}
#define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, c)) #define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, c))
#define vec_dword2_c(v) ((vec_dword2)(v)) #define vec_dword2_c(v) ((vec_dword2)(v))
#define vec_dword2_mx vec_dword2_sp(9223372036854775807LL)
#define vec_dword2_mn vec_dword2_sp(-9223372036854775807LL-1)
#define vec_dword2_z vec_dword2_sp(0) #define vec_dword2_z vec_dword2_sp(0)
typedef __vector double vec_double2; typedef __vector double vec_double2;
#define vec_double2_set(...) (vec_double2){__VA_ARGS__} #define vec_double2_set(...) (vec_double2){__VA_ARGS__}
#define vec_double2_c(v) ((vec_double2)(v)) #define vec_double2_c(v) ((vec_double2)(v))
#define vec_double2_sp(c) (__VSX_S2__(vec_double2, c)) #define vec_double2_sp(c) (__VSX_S2__(vec_double2, c))
#define vec_double2_mx vec_double2_sp(1.7976931348623157E+308)
#define vec_double2_mn vec_double2_sp(2.2250738585072014E-308)
#define vec_double2_z vec_double2_sp(0) #define vec_double2_z vec_double2_sp(0)
#define vec_bchar16 __vector __bool char #define vec_bchar16 __vector __bool char
#define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__} #define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__}
#define vec_bchar16_c(v) ((vec_bchar16)(v)) #define vec_bchar16_c(v) ((vec_bchar16)(v))
#define vec_bchar16_f (__VSX_S16__(vec_bchar16, 0))
#define vec_bchar16_t (__VSX_S16__(vec_bchar16, 1))
#define vec_bshort8 __vector __bool short #define vec_bshort8 __vector __bool short
#define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__} #define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__}
#define vec_bshort8_c(v) ((vec_bshort8)(v)) #define vec_bshort8_c(v) ((vec_bshort8)(v))
#define vec_bshort8_f (__VSX_S8__(vec_bshort8, 0))
#define vec_bshort8_t (__VSX_S8__(vec_bshort8, 1))
#define vec_bint4 __vector __bool int #define vec_bint4 __vector __bool int
#define vec_bint4_set(...) (vec_bint4){__VA_ARGS__} #define vec_bint4_set(...) (vec_bint4){__VA_ARGS__}
#define vec_bint4_c(v) ((vec_bint4)(v)) #define vec_bint4_c(v) ((vec_bint4)(v))
#define vec_bint4_f (__VSX_S4__(vec_bint4, 0))
#define vec_bint4_t (__VSX_S4__(vec_bint4, 1))
#define vec_bdword2 __vector __bool long long #define vec_bdword2 __vector __bool long long
#define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__} #define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__}
#define vec_bdword2_c(v) ((vec_bdword2)(v)) #define vec_bdword2_c(v) ((vec_bdword2)(v))
#define vec_bdword2_f (__VSX_S2__(vec_bdword2, 0))
#define vec_bdword2_t (__VSX_S2__(vec_bdword2, 1))
#define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline)) #define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline))
@ -688,34 +619,17 @@ VSX_IMPL_CONV_ODD_2_4(vec_uint4, vec_double2, vec_ctuo, vec_ctu)
{ vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); } { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); }
#endif #endif
// load 4 unsigned bytes into uint4 vector
#define vec_ld_buw(p) vec_uint4_set((p)[0], (p)[1], (p)[2], (p)[3])
// load 4 signed bytes into int4 vector
#define vec_ld_bsw(p) vec_int4_set((p)[0], (p)[1], (p)[2], (p)[3])
// load 4 unsigned bytes into float vector
#define vec_ld_bps(p) vec_ctf(vec_ld_buw(p), 0)
// Store lower 8 byte // Store lower 8 byte
#define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0) #define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0)
// Store higher 8 byte // Store higher 8 byte
#define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1) #define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1)
/* // Load 64-bits of integer data to lower part
* vec_ld_l8(ptr) -> Load 64-bits of integer data to lower part
* vec_ldz_l8(ptr) -> Load 64-bits of integer data to lower part and zero upper part
**/
#define VSX_IMPL_LOAD_L8(Tvec, Tp) \ #define VSX_IMPL_LOAD_L8(Tvec, Tp) \
VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \ VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \
{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } \ { return ((Tvec)vec_promote(*((uint64*)p), 0)); }
VSX_FINLINE(Tvec) vec_ldz_l8(const Tp *p) \
{ \
/* TODO: try (Tvec)(vec_udword2{*((uint64*)p), 0}) */ \
static const vec_bdword2 mask = {0xFFFFFFFFFFFFFFFF, 0x0000000000000000}; \
return vec_and(vec_ld_l8(p), (Tvec)mask); \
}
VSX_IMPL_LOAD_L8(vec_uchar16, uchar) VSX_IMPL_LOAD_L8(vec_uchar16, uchar)
VSX_IMPL_LOAD_L8(vec_char16, schar) VSX_IMPL_LOAD_L8(vec_char16, schar)
VSX_IMPL_LOAD_L8(vec_ushort8, ushort) VSX_IMPL_LOAD_L8(vec_ushort8, ushort)
@ -747,9 +661,9 @@ VSX_IMPL_LOAD_L8(vec_double2, double)
**/ **/
#define VSX_IMPL_UNPACKU(rt, rg, zero) \ #define VSX_IMPL_UNPACKU(rt, rg, zero) \
VSX_FINLINE(rt) vec_unpacklu(const rg& a) \ VSX_FINLINE(rt) vec_unpacklu(const rg& a) \
{ return reinterpret_cast<rt>(vec_mergel(a, zero)); } \ { return (rt)(vec_mergel(a, zero)); } \
VSX_FINLINE(rt) vec_unpackhu(const rg& a) \ VSX_FINLINE(rt) vec_unpackhu(const rg& a) \
{ return reinterpret_cast<rt>(vec_mergeh(a, zero)); } { return (rt)(vec_mergeh(a, zero)); }
VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z) VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z)
VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z) VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z)

@ -0,0 +1,160 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "opencv2/core/check.hpp"
namespace cv {
const char* depthToString(int depth)
{
const char* s = detail::depthToString_(depth);
return s ? s : "<invalid depth>";
}
const cv::String typeToString(int type)
{
cv::String s = detail::typeToString_(type);
if (s.empty())
{
static cv::String invalidType("<invalid type>");
return invalidType;
}
return s;
}
namespace detail {
static const char* getTestOpPhraseStr(unsigned testOp)
{
static const char* _names[] = { "{custom check}", "equal to", "not equal to", "less than or equal to", "less than", "greater than or equal to", "greater than" };
CV_DbgAssert(testOp < CV__LAST_TEST_OP);
return testOp < CV__LAST_TEST_OP ? _names[testOp] : "???";
}
static const char* getTestOpMath(unsigned testOp)
{
static const char* _names[] = { "???", "==", "!=", "<=", "<", ">=", ">" };
CV_DbgAssert(testOp < CV__LAST_TEST_OP);
return testOp < CV__LAST_TEST_OP ? _names[testOp] : "???";
}
const char* depthToString_(int depth)
{
static const char* depthNames[] = { "CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F", "CV_USRTYPE1" };
return depth <= CV_USRTYPE1 ? depthNames[depth] : NULL;
}
const cv::String typeToString_(int type)
{
int depth = CV_MAT_DEPTH(type);
int cn = CV_MAT_CN(type);
if (depth >= 0 && depth <= CV_USRTYPE1)
return cv::format("%sC%d", depthToString_(depth), cn);
return cv::String();
}
template<typename T> static CV_NORETURN
void check_failed_auto_(const T& v1, const T& v2, const CheckContext& ctx)
{
std::stringstream ss;
ss << ctx.message << " (expected: '" << ctx.p1_str << " " << getTestOpMath(ctx.testOp) << " " << ctx.p2_str << "'), where" << std::endl
<< " '" << ctx.p1_str << "' is " << v1 << std::endl;
if (ctx.testOp != TEST_CUSTOM && ctx.testOp < CV__LAST_TEST_OP)
{
ss << "must be " << getTestOpPhraseStr(ctx.testOp) << std::endl;
}
ss << " '" << ctx.p2_str << "' is " << v2;
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
}
void check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx)
{
std::stringstream ss;
ss << ctx.message << " (expected: '" << ctx.p1_str << " " << getTestOpMath(ctx.testOp) << " " << ctx.p2_str << "'), where" << std::endl
<< " '" << ctx.p1_str << "' is " << v1 << " (" << depthToString(v1) << ")" << std::endl;
if (ctx.testOp != TEST_CUSTOM && ctx.testOp < CV__LAST_TEST_OP)
{
ss << "must be " << getTestOpPhraseStr(ctx.testOp) << std::endl;
}
ss << " '" << ctx.p2_str << "' is " << v2 << " (" << depthToString(v2) << ")";
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
}
void check_failed_MatType(const int v1, const int v2, const CheckContext& ctx)
{
std::stringstream ss;
ss << ctx.message << " (expected: '" << ctx.p1_str << " " << getTestOpMath(ctx.testOp) << " " << ctx.p2_str << "'), where" << std::endl
<< " '" << ctx.p1_str << "' is " << v1 << " (" << typeToString(v1) << ")" << std::endl;
if (ctx.testOp != TEST_CUSTOM && ctx.testOp < CV__LAST_TEST_OP)
{
ss << "must be " << getTestOpPhraseStr(ctx.testOp) << std::endl;
}
ss << " '" << ctx.p2_str << "' is " << v2 << " (" << typeToString(v2) << ")";
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
}
void check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx)
{
check_failed_auto_<int>(v1, v2, ctx);
}
void check_failed_auto(const int v1, const int v2, const CheckContext& ctx)
{
check_failed_auto_<int>(v1, v2, ctx);
}
void check_failed_auto(const float v1, const float v2, const CheckContext& ctx)
{
check_failed_auto_<float>(v1, v2, ctx);
}
void check_failed_auto(const double v1, const double v2, const CheckContext& ctx)
{
check_failed_auto_<double>(v1, v2, ctx);
}
template<typename T> static CV_NORETURN
void check_failed_auto_(const T& v, const CheckContext& ctx)
{
std::stringstream ss;
ss << ctx.message << ":" << std::endl
<< " '" << ctx.p2_str << "'" << std::endl
<< "where" << std::endl
<< " '" << ctx.p1_str << "' is " << v;
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
}
void check_failed_MatDepth(const int v, const CheckContext& ctx)
{
std::stringstream ss;
ss << ctx.message << ":" << std::endl
<< " '" << ctx.p2_str << "'" << std::endl
<< "where" << std::endl
<< " '" << ctx.p1_str << "' is " << v << " (" << depthToString(v) << ")";
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
}
void check_failed_MatType(const int v, const CheckContext& ctx)
{
std::stringstream ss;
ss << ctx.message << ":" << std::endl
<< " '" << ctx.p2_str << "'" << std::endl
<< "where" << std::endl
<< " '" << ctx.p1_str << "' is " << v << " (" << typeToString(v) << ")";
cv::errorNoReturn(cv::Error::StsError, ss.str(), ctx.func, ctx.file, ctx.line);
}
void check_failed_MatChannels(const int v, const CheckContext& ctx)
{
check_failed_auto_<int>(v, ctx);
}
void check_failed_auto(const int v, const CheckContext& ctx)
{
check_failed_auto_<int>(v, ctx);
}
void check_failed_auto(const float v, const CheckContext& ctx)
{
check_failed_auto_<float>(v, ctx);
}
void check_failed_auto(const double v, const CheckContext& ctx)
{
check_failed_auto_<double>(v, ctx);
}
}} // namespace

@ -4275,7 +4275,8 @@ public:
entry.capacity_ = alignSize(size, (int)_allocationGranularity(size)); entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
Context& ctx = Context::getDefault(); Context& ctx = Context::getDefault();
cl_int retval = CL_SUCCESS; cl_int retval = CL_SUCCESS;
CV_OCL_CHECK_(entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval), retval); entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
CV_OCL_CHECK_RESULT(retval, cv::format("clCreateBuffer(capacity=%lld) => %p", (long long int)entry.capacity_, (void*)entry.clBuffer_).c_str());
CV_Assert(entry.clBuffer_ != NULL); CV_Assert(entry.clBuffer_ != NULL);
if(retval == CL_SUCCESS) if(retval == CL_SUCCESS)
{ {
@ -4669,15 +4670,19 @@ public:
{ {
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
u->size, u->origdata, &retval); u->size, u->origdata, &retval);
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p",
(long long int)u->size, u->origdata, (void*)handle).c_str());
} }
if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST)) if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
{ {
handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
u->size, u->origdata, &retval); u->size, u->origdata, &retval);
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p",
(long long int)u->size, u->origdata, (void*)handle).c_str());
tempUMatFlags |= UMatData::TEMP_COPIED_UMAT; tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
} }
} }
CV_OCL_DBG_CHECK_RESULT(retval, "clCreateBuffer()"); CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer() => %p", (void*)handle).c_str());
if(!handle || retval != CL_SUCCESS) if(!handle || retval != CL_SUCCESS)
return false; return false;
u->handle = handle; u->handle = handle;
@ -4805,13 +4810,14 @@ public:
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
(CL_MAP_READ | CL_MAP_WRITE), (CL_MAP_READ | CL_MAP_WRITE),
0, u->size, 0, 0, 0, &retval); 0, u->size, 0, 0, 0, &retval);
CV_OCL_CHECK_RESULT(retval, "clEnqueueMapBuffer()"); CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, data).c_str());
CV_Assert(u->origdata == data); CV_Assert(u->origdata == data);
if (u->originalUMatData) if (u->originalUMatData)
{ {
CV_Assert(u->originalUMatData->data == data); CV_Assert(u->originalUMatData->data == data);
} }
CV_OCL_CHECK(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0)); retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, data, (long long int)u->size).c_str());
CV_OCL_DBG_CHECK(clFinish(q)); CV_OCL_DBG_CHECK(clFinish(q));
} }
} }
@ -4838,7 +4844,8 @@ public:
else else
#endif #endif
{ {
CV_OCL_DBG_CHECK(clReleaseMemObject((cl_mem)u->handle)); cl_int retval = clReleaseMemObject((cl_mem)u->handle);
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clReleaseMemObject(ptr=%p)", (void*)u->handle).c_str());
} }
u->handle = 0; u->handle = 0;
u->markDeviceCopyObsolete(true); u->markDeviceCopyObsolete(true);
@ -4955,7 +4962,7 @@ public:
u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
(CL_MAP_READ | CL_MAP_WRITE), (CL_MAP_READ | CL_MAP_WRITE),
0, u->size, 0, 0, 0, &retval); 0, u->size, 0, 0, 0, &retval);
CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(sz=%lld)", (int64)u->size).c_str()); CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clEnqueueMapBuffer(handle=%p, sz=%lld) => %p", (void*)u->handle, (long long int)u->size, u->data).c_str());
} }
if (u->data && retval == CL_SUCCESS) if (u->data && retval == CL_SUCCESS)
{ {
@ -4982,8 +4989,10 @@ public:
#ifdef HAVE_OPENCL_SVM #ifdef HAVE_OPENCL_SVM
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0); CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
#endif #endif
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, cl_int retval = clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)); 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueReadBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
(void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
u->markHostCopyObsolete(false); u->markHostCopyObsolete(false);
} }
} }
@ -5032,7 +5041,8 @@ public:
if (u->refcount == 0) if (u->refcount == 0)
{ {
CV_Assert(u->mapcount-- == 1); CV_Assert(u->mapcount-- == 1);
CV_OCL_CHECK(retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0)); retval = clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0);
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueUnmapMemObject(handle=%p, data=%p, [sz=%lld])", (void*)u->handle, u->data, (long long int)u->size).c_str());
if (Device::getDefault().isAMD()) if (Device::getDefault().isAMD())
{ {
// required for multithreaded applications (see stitching test) // required for multithreaded applications (see stitching test)
@ -5050,8 +5060,10 @@ public:
#ifdef HAVE_OPENCL_SVM #ifdef HAVE_OPENCL_SVM
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0); CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
#endif #endif
CV_OCL_CHECK(retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)); 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0);
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, 0, sz=%lld, data=%p, 0, 0, 0)",
(void*)u->handle, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
u->markDeviceCopyObsolete(false); u->markDeviceCopyObsolete(false);
u->markHostCopyObsolete(true); u->markHostCopyObsolete(true);
} }
@ -5354,8 +5366,10 @@ public:
if( iscontinuous ) if( iscontinuous )
{ {
AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT); AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, cl_int retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0)); dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0);
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueWriteBuffer(q, handle=%p, CL_TRUE, offset=%lld, sz=%lld, data=%p, 0, 0, 0)",
(void*)u->handle, (long long int)dstrawofs, (long long int)u->size, alignedPtr.getAlignedPtr()).c_str());
} }
else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS) else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
{ {
@ -5527,8 +5541,10 @@ public:
{ {
if( iscontinuous ) if( iscontinuous )
{ {
CV_OCL_CHECK(retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
srcrawofs, dstrawofs, total, 0, 0, 0)); srcrawofs, dstrawofs, total, 0, 0, 0);
CV_OCL_CHECK_RESULT(retval, cv::format("clEnqueueCopyBuffer(q, src=%p, dst=%p, src_offset=%lld, dst_offset=%lld, sz=%lld, 0, 0, 0)",
(void*)src->handle, (void*)dst->handle, (long long int)srcrawofs, (long long int)dstrawofs, (long long int)total).c_str());
} }
else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS) else if (CV_OPENCL_DISABLE_BUFFER_RECT_OPERATIONS)
{ {
@ -6373,7 +6389,9 @@ struct Image2D::Impl
if (!alias && !src.isContinuous()) if (!alias && !src.isContinuous())
{ {
devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err); devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
CV_OCL_CHECK_RESULT(err, "clCreateBuffer()"); CV_OCL_CHECK_RESULT(err, cv::format("clCreateBuffer(CL_MEM_READ_ONLY, sz=%lld) => %p",
(long long int)(src.cols * src.rows * src.elemSize()), (void*)devData
).c_str());
const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1}; const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin, CV_OCL_CHECK(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,

@ -249,10 +249,34 @@ const char* Exception::what() const throw() { return msg.c_str(); }
void Exception::formatMessage() void Exception::formatMessage()
{ {
if( func.size() > 0 ) size_t pos = err.find('\n');
msg = format("OpenCV(%s) %s:%d: error: (%d) %s: %s in function %s\n", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), func.c_str()); bool multiline = pos != cv::String::npos;
if (multiline)
{
std::stringstream ss;
size_t prev_pos = 0;
while (pos != cv::String::npos)
{
ss << "> " << err.substr(prev_pos, pos - prev_pos) << std::endl;
prev_pos = pos + 1;
pos = err.find('\n', prev_pos);
}
ss << "> " << err.substr(prev_pos);
if (err[err.size() - 1] != '\n')
ss << std::endl;
err = ss.str();
}
if (func.size() > 0)
{
if (multiline)
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) in function '%s'\n%s", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), func.c_str(), err.c_str());
else else
msg = format("OpenCV(%s) %s:%d: error: (%d) %s: %s\n", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str()); msg = format("OpenCV(%s) %s:%d: error: (%d:%s) %s in function '%s'\n", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), func.c_str());
}
else
{
msg = format("OpenCV(%s) %s:%d: error: (%d:%s) %s%s", CV_VERSION, file.c_str(), line, code, cvErrorStr(code), err.c_str(), multiline ? "" : "\n");
}
} }
static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL }; static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL };

@ -521,15 +521,25 @@ template<typename R> struct TheTest
TheTest & test_dot_prod() TheTest & test_dot_prod()
{ {
typedef typename V_RegTrait128<LaneType>::w_reg Rx2; typedef typename V_RegTrait128<LaneType>::w_reg Rx2;
typedef typename Rx2::lane_type w_type;
Data<R> dataA, dataB(2); Data<R> dataA, dataB(2);
R a = dataA, b = dataB; R a = dataA, b = dataB;
Data<Rx2> res = v_dotprod(a, b); Data<Rx2> dataC;
dataC += std::numeric_limits<w_type>::is_signed ?
std::numeric_limits<w_type>::min() :
std::numeric_limits<w_type>::max() - R::nlanes * (dataB[0] + 1);
Rx2 c = dataC;
Data<Rx2> resD = v_dotprod(a, b),
resE = v_dotprod(a, b, c);
const int n = R::nlanes / 2; const int n = R::nlanes / 2;
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
{ {
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], res[i]); EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], resD[i]);
EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1] + dataC[i], resE[i]);
} }
return *this; return *this;
} }

@ -245,4 +245,503 @@ TEST(Core_Version, consistency)
EXPECT_EQ(String(CV_VERSION), cv::getVersionString()); EXPECT_EQ(String(CV_VERSION), cv::getVersionString());
} }
//
// Test core/check.hpp macros
//
void test_check_eq_1(int value_1, int value_2)
{
CV_CheckEQ(value_1, value_2, "Validation check failed");
}
TEST(Core_Check, testEQ_int_fail)
{
try
{
test_check_eq_1(123, 5678);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation check failed (expected: 'value_1 == value_2'), where\n"
"> 'value_1' is 123\n"
"> must be equal to\n"
"> 'value_2' is 5678\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testEQ_int_pass)
{
EXPECT_NO_THROW(
{
test_check_eq_1(1234, 1234);
});
}
void test_check_eq_2(float value_1, float value_2)
{
CV_CheckEQ(value_1, value_2, "Validation check failed (float)");
}
TEST(Core_Check, testEQ_float_fail)
{
try
{
test_check_eq_2(1234.5f, 1234.55f);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation check failed (float) (expected: 'value_1 == value_2'), where\n"
"> 'value_1' is 1234.5\n" // TODO Locale handling (use LC_ALL=C on Linux)
"> must be equal to\n"
"> 'value_2' is 1234.55\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testEQ_float_pass)
{
EXPECT_NO_THROW(
{
test_check_eq_2(1234.6f, 1234.6f);
});
}
void test_check_eq_3(double value_1, double value_2)
{
CV_CheckEQ(value_1, value_2, "Validation check failed (double)");
}
TEST(Core_Check, testEQ_double_fail)
{
try
{
test_check_eq_3(1234.5, 1234.56);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation check failed (double) (expected: 'value_1 == value_2'), where\n"
"> 'value_1' is 1234.5\n" // TODO Locale handling (use LC_ALL=C on Linux)
"> must be equal to\n"
"> 'value_2' is 1234.56\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testEQ_double_pass)
{
EXPECT_NO_THROW(
{
test_check_eq_3(1234.0f, 1234.0f);
});
}
void test_check_ne_1(int value_1, int value_2)
{
CV_CheckNE(value_1, value_2, "Validation NE check failed");
}
TEST(Core_Check, testNE_int_fail)
{
try
{
test_check_ne_1(123, 123);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation NE check failed (expected: 'value_1 != value_2'), where\n"
"> 'value_1' is 123\n"
"> must be not equal to\n"
"> 'value_2' is 123\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testNE_int_pass)
{
EXPECT_NO_THROW(
{
test_check_ne_1(123, 1234);
});
}
void test_check_le_1(int value_1, int value_2)
{
CV_CheckLE(value_1, value_2, "Validation LE check failed");
}
TEST(Core_Check, testLE_int_fail)
{
try
{
test_check_le_1(1234, 123);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation LE check failed (expected: 'value_1 <= value_2'), where\n"
"> 'value_1' is 1234\n"
"> must be less than or equal to\n"
"> 'value_2' is 123\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testLE_int_pass)
{
EXPECT_NO_THROW(
{
test_check_le_1(1234, 1234);
});
EXPECT_NO_THROW(
{
test_check_le_1(123, 1234);
});
}
void test_check_lt_1(int value_1, int value_2)
{
CV_CheckLT(value_1, value_2, "Validation LT check failed");
}
TEST(Core_Check, testLT_int_fail)
{
try
{
test_check_lt_1(1234, 123);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation LT check failed (expected: 'value_1 < value_2'), where\n"
"> 'value_1' is 1234\n"
"> must be less than\n"
"> 'value_2' is 123\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testLT_int_fail_eq)
{
try
{
test_check_lt_1(123, 123);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation LT check failed (expected: 'value_1 < value_2'), where\n"
"> 'value_1' is 123\n"
"> must be less than\n"
"> 'value_2' is 123\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testLT_int_pass)
{
EXPECT_NO_THROW(
{
test_check_lt_1(123, 1234);
});
}
void test_check_ge_1(int value_1, int value_2)
{
CV_CheckGE(value_1, value_2, "Validation GE check failed");
}
TEST(Core_Check, testGE_int_fail)
{
try
{
test_check_ge_1(123, 1234);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation GE check failed (expected: 'value_1 >= value_2'), where\n"
"> 'value_1' is 123\n"
"> must be greater than or equal to\n"
"> 'value_2' is 1234\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testGE_int_pass)
{
EXPECT_NO_THROW(
{
test_check_ge_1(1234, 1234);
});
EXPECT_NO_THROW(
{
test_check_ge_1(1234, 123);
});
}
void test_check_gt_1(int value_1, int value_2)
{
CV_CheckGT(value_1, value_2, "Validation GT check failed");
}
TEST(Core_Check, testGT_int_fail)
{
try
{
test_check_gt_1(123, 1234);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation GT check failed (expected: 'value_1 > value_2'), where\n"
"> 'value_1' is 123\n"
"> must be greater than\n"
"> 'value_2' is 1234\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testGT_int_fail_eq)
{
try
{
test_check_gt_1(123, 123);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Validation GT check failed (expected: 'value_1 > value_2'), where\n"
"> 'value_1' is 123\n"
"> must be greater than\n"
"> 'value_2' is 123\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
TEST(Core_Check, testGT_int_pass)
{
EXPECT_NO_THROW(
{
test_check_gt_1(1234, 123);
});
}
void test_check_MatType_1(int src_type)
{
CV_CheckTypeEQ(src_type, CV_32FC1, "Unsupported source type");
}
TEST(Core_Check, testMatType_pass)
{
EXPECT_NO_THROW(
{
test_check_MatType_1(CV_MAKE_TYPE(CV_32F, 1));
});
}
TEST(Core_Check, testMatType_fail_1)
{
try
{
test_check_MatType_1(CV_8UC1);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Unsupported source type (expected: 'src_type == CV_32FC1'), where\n"
"> 'src_type' is 0 (CV_8UC1)\n"
"> must be equal to\n"
"> 'CV_32FC1' is 5 (CV_32FC1)\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
void test_check_MatType_2(int src_type)
{
CV_CheckType(src_type, src_type == CV_32FC1 || src_type == CV_32FC3, "Unsupported src");
}
TEST(Core_Check, testMatType_fail_2)
{
try
{
test_check_MatType_2(CV_8UC1);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Unsupported src:\n"
"> 'src_type == CV_32FC1 || src_type == CV_32FC3'\n"
"> where\n> 'src_type' is 0 (CV_8UC1)\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
void test_check_MatDepth_1(int src_depth)
{
CV_CheckDepthEQ(src_depth, CV_32F, "Unsupported source depth");
}
TEST(Core_Check, testMatDepth_pass)
{
EXPECT_NO_THROW(
{
test_check_MatDepth_1(CV_MAKE_TYPE(CV_32F, 1));
});
}
TEST(Core_Check, testMatDepth_fail_1)
{
try
{
test_check_MatDepth_1(CV_8U);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Unsupported source depth (expected: 'src_depth == CV_32F'), where\n"
"> 'src_depth' is 0 (CV_8U)\n"
"> must be equal to\n"
"> 'CV_32F' is 5 (CV_32F)\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
void test_check_MatDepth_2(int src_depth)
{
CV_CheckDepth(src_depth, src_depth == CV_32F || src_depth == CV_64F, "Unsupported src");
}
TEST(Core_Check, testMatDepth_fail_2)
{
try
{
test_check_MatDepth_2(CV_8U);
FAIL() << "Unreachable code called";
}
catch (const cv::Exception& e)
{
EXPECT_STREQ(e.err.c_str(),
"> Unsupported src:\n"
"> 'src_depth == CV_32F || src_depth == CV_64F'\n"
"> where\n> 'src_depth' is 0 (CV_8U)\n"
);
}
catch (const std::exception& e)
{
FAIL() << "Unexpected C++ exception: " << e.what();
}
catch (...)
{
FAIL() << "Unexpected unknown exception";
}
}
}} // namespace }} // namespace

@ -559,7 +559,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
{ {
public: public:
float pnorm, epsilon; float pnorm, epsilon;
bool acrossSpatial; CV_DEPRECATED bool acrossSpatial;
static Ptr<NormalizeBBoxLayer> create(const LayerParams& params); static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
}; };

@ -80,7 +80,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
enum Target enum Target
{ {
DNN_TARGET_CPU, DNN_TARGET_CPU,
DNN_TARGET_OPENCL DNN_TARGET_OPENCL,
DNN_TARGET_OPENCL_FP16
}; };
/** @brief This class provides all data needed to initialize layer. /** @brief This class provides all data needed to initialize layer.

@ -318,6 +318,7 @@ for node in graph_def.node:
node.input.pop() node.input.pop()
node.input.pop() node.input.pop()
node.input.append(layer_256_1_relu1.name) node.input.append(layer_256_1_relu1.name)
node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')
break break
softmaxShape = NodeDef() softmaxShape = NodeDef()

@ -13,7 +13,7 @@
namespace opencv_test { namespace opencv_test {
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE) CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE)
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL) CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)
class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<DNNBackend, DNNTarget> > class DNNTestNetwork : public ::perf::TestBaseWithParam< tuple<DNNBackend, DNNTarget> >
{ {
@ -41,8 +41,6 @@ public:
throw cvtest::SkipTestException("OpenCL is not available/disabled in OpenCV"); throw cvtest::SkipTestException("OpenCL is not available/disabled in OpenCV");
} }
} }
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
throw SkipTestException("Skip OpenCL target of Inference Engine backend");
randu(input, 0.0f, 1.0f); randu(input, 0.0f, 1.0f);
@ -89,24 +87,32 @@ public:
PERF_TEST_P_(DNNTestNetwork, AlexNet) PERF_TEST_P_(DNNTestNetwork, AlexNet)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt", processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
"alexnet.yml", Mat(cv::Size(227, 227), CV_32FC3)); "alexnet.yml", Mat(cv::Size(227, 227), CV_32FC3));
} }
PERF_TEST_P_(DNNTestNetwork, GoogLeNet) PERF_TEST_P_(DNNTestNetwork, GoogLeNet)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt", processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
"", Mat(cv::Size(224, 224), CV_32FC3)); "", Mat(cv::Size(224, 224), CV_32FC3));
} }
PERF_TEST_P_(DNNTestNetwork, ResNet_50) PERF_TEST_P_(DNNTestNetwork, ResNet_50)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt", processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
"resnet_50.yml", Mat(cv::Size(224, 224), CV_32FC3)); "resnet_50.yml", Mat(cv::Size(224, 224), CV_32FC3));
} }
PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1) PERF_TEST_P_(DNNTestNetwork, SqueezeNet_v1_1)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt", processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
"squeezenet_v1_1.yml", Mat(cv::Size(227, 227), CV_32FC3)); "squeezenet_v1_1.yml", Mat(cv::Size(227, 227), CV_32FC3));
} }
@ -135,14 +141,18 @@ PERF_TEST_P_(DNNTestNetwork, SSD)
PERF_TEST_P_(DNNTestNetwork, OpenFace) PERF_TEST_P_(DNNTestNetwork, OpenFace)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
processNet("dnn/openface_nn4.small2.v1.t7", "", "", processNet("dnn/openface_nn4.small2.v1.t7", "", "",
Mat(cv::Size(96, 96), CV_32FC3)); Mat(cv::Size(96, 96), CV_32FC3));
} }
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe) PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "", processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "",
Mat(cv::Size(300, 300), CV_32FC3)); Mat(cv::Size(300, 300), CV_32FC3));
} }
@ -150,7 +160,8 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow) PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
{ {
if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL || if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
backend == DNN_BACKEND_HALIDE) backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException(""); throw SkipTestException("");
processNet("dnn/ssd_mobilenet_v1_coco.pb", "ssd_mobilenet_v1_coco.pbtxt", "", processNet("dnn/ssd_mobilenet_v1_coco.pb", "ssd_mobilenet_v1_coco.pbtxt", "",
Mat(cv::Size(300, 300), CV_32FC3)); Mat(cv::Size(300, 300), CV_32FC3));
@ -158,7 +169,9 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
PERF_TEST_P_(DNNTestNetwork, DenseNet_121) PERF_TEST_P_(DNNTestNetwork, DenseNet_121)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", "", processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", "",
Mat(cv::Size(224, 224), CV_32FC3)); Mat(cv::Size(224, 224), CV_32FC3));
} }
@ -189,7 +202,7 @@ PERF_TEST_P_(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
PERF_TEST_P_(DNNTestNetwork, opencv_face_detector) PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
{ {
if (backend == DNN_BACKEND_HALIDE || if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL) backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException(""); throw SkipTestException("");
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", "", processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", "",
Mat(cv::Size(300, 300), CV_32FC3)); Mat(cv::Size(300, 300), CV_32FC3));
@ -197,7 +210,9 @@ PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow) PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", "", processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", "",
Mat(cv::Size(300, 300), CV_32FC3)); Mat(cv::Size(300, 300), CV_32FC3));
} }
@ -209,6 +224,8 @@ const tuple<DNNBackend, DNNTarget> testCases[] = {
#endif #endif
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU), tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
#endif #endif
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_CPU), tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)

@ -50,7 +50,7 @@ syntax = "proto2";
package opencv_caffe; package opencv_caffe;
// NVidia's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe: // NVIDIA's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe:
// Math and storage types // Math and storage types
enum Type { enum Type {
DOUBLE = 0; DOUBLE = 0;
@ -72,10 +72,10 @@ message BlobProto {
repeated double double_data = 8 [packed = true]; repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true]; repeated double double_diff = 9 [packed = true];
// NVidia's Caffe fields begin. // NVIDIA's Caffe fields begin.
optional Type raw_data_type = 10; optional Type raw_data_type = 10;
optional bytes raw_data = 12 [packed = false]; optional bytes raw_data = 12 [packed = false];
// NVidia's Caffe fields end. // NVIDIA's Caffe fields end.
// 4D dimensions -- deprecated. Use "shape" instead. // 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [default = 0]; optional int32 num = 1 [default = 0];

@ -1154,7 +1154,7 @@ struct Net::Impl
ld.skip = true; ld.skip = true;
} }
layers[lastLayerId].skip = false; layers[lastLayerId].skip = false;
ieNode->net->init(); ieNode->net->init(preferableTarget);
return; return;
} }
@ -1167,17 +1167,17 @@ struct Net::Impl
for (it = layers.begin(); it != layers.end(); ++it) for (it = layers.begin(); it != layers.end(); ++it)
{ {
LayerData &ld = it->second; LayerData &ld = it->second;
ld.skip = true; // Initially skip all Inference Engine supported layers. bool fused = ld.skip && ld.id != 0;
Ptr<Layer> layer = ld.layerInstance;
Ptr<Layer> layer = ld.layerInstance;
if (!layer->supportBackend(preferableBackend)) if (!layer->supportBackend(preferableBackend))
{ {
addInfEngineNetOutputs(ld); addInfEngineNetOutputs(ld);
ld.skip = false;
net = Ptr<InfEngineBackendNet>(); net = Ptr<InfEngineBackendNet>();
netBlobsWrappers.clear(); netBlobsWrappers.clear();
continue; continue;
} }
ld.skip = true; // Initially skip all Inference Engine supported layers.
// Create a new network if one of inputs from different Inference Engine graph. // Create a new network if one of inputs from different Inference Engine graph.
for (int i = 0; i < ld.inputBlobsId.size(); ++i) for (int i = 0; i < ld.inputBlobsId.size(); ++i)
@ -1217,18 +1217,15 @@ struct Net::Impl
} }
netBlobsWrappers[ld.id] = ld.outputBlobsWrappers[0]; netBlobsWrappers[ld.id] = ld.outputBlobsWrappers[0];
bool fused = false;
Ptr<BackendNode> node; Ptr<BackendNode> node;
if (!net.empty()) if (!net.empty())
{ {
// Try to fuse. if (fused)
{
bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
ld.inputBlobs[0]->data == ld.outputBlobs[0].data; ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
if (inPlace) CV_Assert(inPlace);
{ node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
node = layer->tryAttach(layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]);
fused = !node.empty();
if (fused)
ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
} }
} }
@ -1247,6 +1244,19 @@ struct Net::Impl
CV_Assert(!ieNode.empty()); CV_Assert(!ieNode.empty());
ieNode->net = net; ieNode->net = net;
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !fused)
{
ieNode->layer->precision = InferenceEngine::Precision::FP16;
auto weightableLayer = std::dynamic_pointer_cast<InferenceEngine::WeightableLayer>(ieNode->layer);
if (weightableLayer)
{
if (weightableLayer->_weights)
weightableLayer->_weights = convertFp16(weightableLayer->_weights);
if (weightableLayer->_biases)
weightableLayer->_biases = convertFp16(weightableLayer->_biases);
}
}
ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers); ieNode->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers);
net->addBlobs(ld.inputBlobsWrappers); net->addBlobs(ld.inputBlobsWrappers);
net->addBlobs(ld.outputBlobsWrappers); net->addBlobs(ld.outputBlobsWrappers);
@ -1276,7 +1286,7 @@ struct Net::Impl
if (!ieNode->net->isInitialized()) if (!ieNode->net->isInitialized())
{ {
ieNode->net->init(); ieNode->net->init(preferableTarget);
ld.skip = false; ld.skip = false;
} }
} }
@ -1380,7 +1390,8 @@ struct Net::Impl
void fuseLayers(const std::vector<LayerPin>& blobsToKeep_) void fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
{ {
if( !fusion || preferableBackend != DNN_BACKEND_DEFAULT) if( !fusion || preferableBackend != DNN_BACKEND_DEFAULT &&
preferableBackend != DNN_BACKEND_INFERENCE_ENGINE)
return; return;
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
@ -1407,7 +1418,7 @@ struct Net::Impl
// some other layers. // some other layers.
// TODO: OpenCL target support more fusion styles. // TODO: OpenCL target support more fusion styles.
if ( preferableTarget == DNN_TARGET_OPENCL && if ( preferableBackend == DNN_BACKEND_DEFAULT && preferableTarget == DNN_TARGET_OPENCL &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN")) ) ld.layerInstance->type != "MVN")) )
continue; continue;
@ -1442,6 +1453,9 @@ struct Net::Impl
break; break;
} }
if (preferableBackend != DNN_BACKEND_DEFAULT)
continue; // Go to the next layer.
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
if ( preferableTarget != DNN_TARGET_OPENCL || if ( preferableTarget != DNN_TARGET_OPENCL ||
(preferableTarget == DNN_TARGET_OPENCL && (preferableTarget == DNN_TARGET_OPENCL &&
@ -1583,6 +1597,9 @@ struct Net::Impl
} }
} }
if (preferableBackend != DNN_BACKEND_DEFAULT)
continue; // Go to the next layer.
// the optimization #2. if there is no layer that takes max pooling layer's computed // the optimization #2. if there is no layer that takes max pooling layer's computed
// max indices (and only some semantical segmentation networks might need this; // max indices (and only some semantical segmentation networks might need this;
// many others only take the maximum values), then we switch the max pooling // many others only take the maximum values), then we switch the max pooling
@ -1944,7 +1961,8 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second)); ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second));
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode; ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
cvNet.connect(0, 0, lid, 0); for (int i = 0; i < inputsNames.size(); ++i)
cvNet.connect(0, i, lid, i);
} }
cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE); cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);

@ -234,19 +234,6 @@ public:
#endif // HAVE_HALIDE #endif // HAVE_HALIDE
break; break;
} }
case DNN_BACKEND_INFERENCE_ENGINE:
{
#ifdef HAVE_INF_ENGINE
auto base = node.dynamicCast<InfEngineBackendNode>();
auto conv = std::dynamic_pointer_cast<InferenceEngine::ConvolutionLayer>(base->layer);
if (conv)
{
fuseConvWeights(conv, weights_, bias_);
return base;
}
#endif // HAVE_INF_ENGINE
break;
}
} }
return Ptr<BackendNode>(); return Ptr<BackendNode>();
} }
@ -287,8 +274,9 @@ public:
lp.precision = InferenceEngine::Precision::FP32; lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp)); std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
ieLayer->_weights = wrapToInfEngineBlob(weights_); const int numChannels = weights_.total();
ieLayer->_biases = wrapToInfEngineBlob(bias_); ieLayer->_weights = wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C);
ieLayer->_biases = wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer)); return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE

@ -40,6 +40,7 @@
// //
//M*/ //M*/
#include "../precomp.hpp" #include "../precomp.hpp"
#include "../op_inf_engine.hpp"
namespace cv namespace cv
{ {
@ -53,6 +54,12 @@ public:
setParamsFrom(params); setParamsFrom(params);
} }
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_DEFAULT ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
}
bool getMemoryShapes(const std::vector<MatShape> &inputs, bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs, const int requiredOutputs,
std::vector<MatShape> &outputs, std::vector<MatShape> &outputs,
@ -104,6 +111,19 @@ public:
if (outputs[i].data != inputs[i]->data) if (outputs[i].data != inputs[i]->data)
inputs[i]->copyTo(outputs[i]); inputs[i]->copyTo(outputs[i]);
} }
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Split";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::SplitLayer> ieLayer(new InferenceEngine::SplitLayer(lp));
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
}; };
Ptr<Layer> BlankLayer::create(const LayerParams& params) Ptr<Layer> BlankLayer::create(const LayerParams& params)

@ -173,21 +173,21 @@ public:
std::vector<float> biasvec; std::vector<float> biasvec;
std::vector<float> reluslope; std::vector<float> reluslope;
Ptr<ActivationLayer> activ; Ptr<ActivationLayer> activ;
bool newWeightAndBias;
bool fusedBias;
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
Ptr<OCL4DNNConvSpatial<float> > convolutionOp; Ptr<OCL4DNNConvSpatial<float> > convolutionOp;
std::vector<UMat> umat_blobs; std::vector<UMat> umat_blobs;
bool fusedBias;
bool newWeightAndBias;
bool newActiv; bool newActiv;
ocl4dnnFusedActiv_t activType; ocl4dnnFusedActiv_t activType;
float power; float power;
#endif #endif
ConvolutionLayerImpl(const LayerParams &params) : BaseConvolutionLayerImpl(params) ConvolutionLayerImpl(const LayerParams &params) : BaseConvolutionLayerImpl(params)
{ {
#ifdef HAVE_OPENCL
fusedBias = false;
newWeightAndBias = false; newWeightAndBias = false;
fusedBias = false;
#ifdef HAVE_OPENCL
newActiv = false; newActiv = false;
activType = OCL4DNN_CONV_FUSED_ACTIV_NONE; activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
power = 0.f; power = 0.f;
@ -350,10 +350,8 @@ public:
biasvec[i] += b.at<float>(i); biasvec[i] += b.at<float>(i);
} }
#ifdef HAVE_OPENCL
newWeightAndBias = !w.empty() || !b.empty(); newWeightAndBias = !w.empty() || !b.empty();
fusedBias = hasBias() || !b.empty(); fusedBias = hasBias() || !b.empty();
#endif
biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1]; biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
} }
@ -433,9 +431,31 @@ public:
ieLayer->_dilation_y = dilation.height; ieLayer->_dilation_y = dilation.height;
ieLayer->_group = group; ieLayer->_group = group;
ieLayer->_weights = wrapToInfEngineBlob(blobs[0]); ieLayer->_weights = wrapToInfEngineBlob(blobs[0], InferenceEngine::Layout::OIHW);
if (hasBias()) if (newWeightAndBias)
ieLayer->_biases = wrapToInfEngineBlob(blobs[1]); {
if (weightsMat.isContinuous())
{
Mat fusedWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size);
ieLayer->_weights = wrapToInfEngineBlob(fusedWeights, InferenceEngine::Layout::OIHW);
}
else
{
ieLayer->_weights = InferenceEngine::make_shared_blob<float>(
InferenceEngine::Precision::FP32, InferenceEngine::Layout::OIHW,
ieLayer->_weights->dims());
ieLayer->_weights->allocate();
Mat newWeights = infEngineBlobToMat(ieLayer->_weights).reshape(1, outCn);
Mat fusedWeights = weightsMat.colRange(0, newWeights.cols);
fusedWeights.copyTo(newWeights);
}
}
if (hasBias() || fusedBias)
{
Mat biasesMat({outCn}, CV_32F, &biasvec[0]);
ieLayer->_biases = wrapToInfEngineBlob(biasesMat, {outCn}, InferenceEngine::Layout::C);
}
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer)); return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE
return Ptr<BackendNode>(); return Ptr<BackendNode>();

@ -412,9 +412,9 @@ public:
std::shared_ptr<InferenceEngine::FullyConnectedLayer> ieLayer(new InferenceEngine::FullyConnectedLayer(lp)); std::shared_ptr<InferenceEngine::FullyConnectedLayer> ieLayer(new InferenceEngine::FullyConnectedLayer(lp));
ieLayer->_out_num = blobs[0].size[0]; ieLayer->_out_num = blobs[0].size[0];
ieLayer->_weights = wrapToInfEngineBlob(blobs[0]); ieLayer->_weights = wrapToInfEngineBlob(blobs[0], {blobs[0].size[0], blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW);
if (blobs.size() > 1) if (blobs.size() > 1)
ieLayer->_biases = wrapToInfEngineBlob(blobs[1]); ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {ieLayer->_out_num}, InferenceEngine::Layout::C);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer)); return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE
return Ptr<BackendNode>(); return Ptr<BackendNode>();

@ -77,7 +77,20 @@ bool getParameter(const LayerParams &params, const std::string& nameBase, const
{ {
if (params.has(nameAll_)) if (params.has(nameAll_))
{ {
parameterH = parameterW = params.get<int>(nameAll_); DictValue param = params.get(nameAll_);
parameterH = param.get<int>(0);
if (param.size() == 1)
{
parameterW = parameterH;
}
else if (param.size() == 2)
{
parameterW = param.get<int>(1);
}
else
{
return false;
}
return true; return true;
} }
else else

@ -42,6 +42,7 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include "../op_inf_engine.hpp"
namespace cv { namespace dnn { namespace cv { namespace dnn {
@ -54,9 +55,19 @@ public:
pnorm = params.get<float>("p", 2); pnorm = params.get<float>("p", 2);
epsilon = params.get<float>("eps", 1e-10f); epsilon = params.get<float>("eps", 1e-10f);
acrossSpatial = params.get<bool>("across_spatial", true); acrossSpatial = params.get<bool>("across_spatial", true);
startAxis = params.get<int>("start_axis", 1);
CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
CV_Assert(pnorm > 0); CV_Assert(pnorm > 0);
} }
virtual bool supportBackend(int backendId) CV_OVERRIDE
{
return backendId == DNN_BACKEND_DEFAULT ||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
pnorm == 2 && !blobs.empty();
}
bool getMemoryShapes(const std::vector<MatShape> &inputs, bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs, const int requiredOutputs,
std::vector<MatShape> &outputs, std::vector<MatShape> &outputs,
@ -85,20 +96,26 @@ public:
const UMat& inp0 = inputs[0]; const UMat& inp0 = inputs[0];
UMat& buffer = internals[0]; UMat& buffer = internals[0];
size_t num = inp0.size[0]; startAxis = clamp(startAxis, inp0.dims);
size_t channels = inp0.size[1]; endAxis = clamp(endAxis, inp0.dims);
size_t channelSize = inp0.total() / (num * channels);
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
size_t planeSize = inp0.total() / (num * numPlanes);
MatShape s = shape(1, inputs[0].total());
UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
for (size_t i = 0; i < num; ++i) for (size_t i = 0; i < num; ++i)
{ {
MatShape s = shape(channels, channelSize); s = shape(numPlanes, planeSize);
UMat src = inputs[i].reshape(1, s.size(), &s[0]); UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
UMat dst = outputs[i].reshape(1, s.size(), &s[0]); UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
UMat abs_mat; UMat abs_mat;
absdiff(src, cv::Scalar::all(0), abs_mat); absdiff(src, cv::Scalar::all(0), abs_mat);
pow(abs_mat, pnorm, buffer); pow(abs_mat, pnorm, buffer);
if (acrossSpatial) if (planeSize == 1)
{ {
// add eps to avoid overflow // add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon; float absSum = sum(buffer)[0] + epsilon;
@ -114,7 +131,7 @@ public:
// compute inverted norm to call multiply instead divide // compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm); cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, channels, 1, buffer); repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst); multiply(src, buffer, dst);
} }
@ -130,7 +147,7 @@ public:
else else
{ {
// _scale: _channels x 1 // _scale: _channels x 1
CV_Assert(scale.total() == channels); CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer); repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst); multiply(dst, buffer, dst);
} }
@ -162,17 +179,22 @@ public:
const Mat& inp0 = *inputs[0]; const Mat& inp0 = *inputs[0];
Mat& buffer = internals[0]; Mat& buffer = internals[0];
size_t num = inp0.size[0]; startAxis = clamp(startAxis, inp0.dims);
size_t channels = inp0.size[1]; endAxis = clamp(endAxis, inp0.dims);
size_t channelSize = inp0.total() / (num * channels);
const float* inpData = inp0.ptr<float>();
float* outData = outputs[0].ptr<float>();
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
size_t planeSize = inp0.total() / (num * numPlanes);
for (size_t n = 0; n < num; ++n) for (size_t n = 0; n < num; ++n)
{ {
Mat src = Mat(channels, channelSize, CV_32F, (void*)inp0.ptr<float>(n)); Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
Mat dst = Mat(channels, channelSize, CV_32F, (void*)outputs[0].ptr<float>(n)); Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
cv::pow(abs(src), pnorm, buffer); cv::pow(abs(src), pnorm, buffer);
if (acrossSpatial) if (planeSize == 1)
{ {
// add eps to avoid overflow // add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon; float absSum = sum(buffer)[0] + epsilon;
@ -188,7 +210,7 @@ public:
// compute inverted norm to call multiply instead divide // compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm); cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, channels, 1, buffer); repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst); multiply(src, buffer, dst);
} }
@ -204,13 +226,40 @@ public:
else else
{ {
// _scale: _channels x 1 // _scale: _channels x 1
CV_Assert(scale.total() == channels); CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer); repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst); multiply(dst, buffer, dst);
} }
} }
inpData += numPlanes * planeSize;
outData += numPlanes * planeSize;
} }
} }
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
InferenceEngine::LayerParams lp;
lp.name = name;
lp.type = "Normalize";
lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
CV_Assert(!blobs.empty());
ieLayer->params["eps"] = format("%f", epsilon);
ieLayer->params["across_spatial"] = acrossSpatial ? "1" : "0";
ieLayer->params["channel_shared"] = blobs[0].total() == 1 ? "1" : "0";
const int numChannels = blobs[0].total();
ieLayer->blobs["weights"] = wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE
return Ptr<BackendNode>();
}
private:
int startAxis, endAxis;
}; };

@ -132,20 +132,6 @@ public:
#endif // HAVE_HALIDE #endif // HAVE_HALIDE
break; break;
} }
case DNN_BACKEND_INFERENCE_ENGINE:
{
#ifdef HAVE_INF_ENGINE
auto base = node.dynamicCast<InfEngineBackendNode>();
auto conv = std::dynamic_pointer_cast<InferenceEngine::ConvolutionLayer>(base->layer);
if (conv)
{
Mat bias = hasBias ? blobs[1] : Mat();
fuseConvWeights(conv, blobs[0], bias);
return base;
}
#endif // HAVE_INF_ENGINE
break;
}
} }
return Ptr<BackendNode>(); return Ptr<BackendNode>();
} }
@ -192,9 +178,10 @@ public:
lp.precision = InferenceEngine::Precision::FP32; lp.precision = InferenceEngine::Precision::FP32;
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp)); std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
ieLayer->_weights = wrapToInfEngineBlob(blobs[0]); const int numChannels = blobs[0].total();
ieLayer->_weights = wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C);
if (hasBias) if (hasBias)
ieLayer->_biases = wrapToInfEngineBlob(blobs[1]); ieLayer->_biases = wrapToInfEngineBlob(blobs[1], {numChannels}, InferenceEngine::Layout::C);
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer)); return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE

@ -90,27 +90,6 @@ public:
} }
} }
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
{
switch (node->backendId)
{
case DNN_BACKEND_INFERENCE_ENGINE:
{
#ifdef HAVE_INF_ENGINE
auto base = node.dynamicCast<InfEngineBackendNode>();
auto conv = std::dynamic_pointer_cast<InferenceEngine::ConvolutionLayer>(base->layer);
if (conv)
{
fuseConvWeights(conv, Mat(), blobs[0]);
return base;
}
#endif // HAVE_INF_ENGINE
break;
}
}
return Ptr<BackendNode>();
}
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
{ {
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE

@ -18,6 +18,11 @@ namespace cv { namespace dnn {
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
static int infEngineVersion()
{
return std::atoi(InferenceEngine::GetInferenceEngineVersion()->buildNumber);
}
InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::CNNLayerPtr& _layer) InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::CNNLayerPtr& _layer)
: BackendNode(DNN_BACKEND_INFERENCE_ENGINE), layer(_layer) {} : BackendNode(DNN_BACKEND_INFERENCE_ENGINE), layer(_layer) {}
@ -58,23 +63,37 @@ static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std:
{ {
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims); std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
std::reverse(reversedShape.begin(), reversedShape.end()); std::reverse(reversedShape.begin(), reversedShape.end());
if (infEngineVersion() > 5855)
{
InferenceEngine::Layout l = InferenceEngine::Layout::ANY;
if (m.dims == 4)
l = InferenceEngine::Layout::NCHW;
else if (m.dims == 2)
l = InferenceEngine::Layout::NC;
return InferenceEngine::DataPtr( return InferenceEngine::DataPtr(
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32, new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32, l)
InferenceEngine::Layout::ANY)
); );
}
else
{
return InferenceEngine::DataPtr(
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32)
);
}
} }
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape) InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape,
InferenceEngine::Layout layout)
{ {
return InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, return InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
shape, (float*)m.data); layout, shape, (float*)m.data);
} }
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m) InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout)
{ {
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims); std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
std::reverse(reversedShape.begin(), reversedShape.end()); std::reverse(reversedShape.begin(), reversedShape.end());
return wrapToInfEngineBlob(m, reversedShape); return wrapToInfEngineBlob(m, reversedShape, layout);
} }
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr) InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr)
@ -109,10 +128,14 @@ void InfEngineBackendWrapper::setHostDirty()
InfEngineBackendNet::InfEngineBackendNet() InfEngineBackendNet::InfEngineBackendNet()
{ {
targetDevice = InferenceEngine::TargetDevice::eCPU;
precision = InferenceEngine::Precision::FP32;
} }
InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net) InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net)
{ {
targetDevice = InferenceEngine::TargetDevice::eCPU;
precision = InferenceEngine::Precision::FP32;
inputs = net.getInputsInfo(); inputs = net.getInputsInfo();
outputs = net.getOutputsInfo(); outputs = net.getOutputsInfo();
layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly. layers.resize(net.layerCount()); // A hack to execute InfEngineBackendNet::layerCount correctly.
@ -126,9 +149,14 @@ void InfEngineBackendNet::Release() noexcept
outputs.clear(); outputs.clear();
} }
void InfEngineBackendNet::setPrecision(InferenceEngine::Precision p) noexcept
{
precision = p;
}
InferenceEngine::Precision InfEngineBackendNet::getPrecision() noexcept InferenceEngine::Precision InfEngineBackendNet::getPrecision() noexcept
{ {
return InferenceEngine::Precision::FP32; return precision;
} }
// Assume that outputs of network is unconnected blobs. // Assume that outputs of network is unconnected blobs.
@ -161,9 +189,8 @@ InferenceEngine::InputInfo::Ptr InfEngineBackendNet::getInput(const std::string
return it->second; return it->second;
} }
void InfEngineBackendNet::getName(char *pName, size_t len) noexcept void InfEngineBackendNet::getName(char*, size_t) noexcept
{ {
CV_Error(Error::StsNotImplemented, "");
} }
size_t InfEngineBackendNet::layerCount() noexcept size_t InfEngineBackendNet::layerCount() noexcept
@ -213,13 +240,15 @@ InfEngineBackendNet::getLayerByName(const char *layerName, InferenceEngine::CNNL
void InfEngineBackendNet::setTargetDevice(InferenceEngine::TargetDevice device) noexcept void InfEngineBackendNet::setTargetDevice(InferenceEngine::TargetDevice device) noexcept
{ {
if (device != InferenceEngine::TargetDevice::eCPU) if (device != InferenceEngine::TargetDevice::eCPU &&
device != InferenceEngine::TargetDevice::eGPU)
CV_Error(Error::StsNotImplemented, ""); CV_Error(Error::StsNotImplemented, "");
targetDevice = device;
} }
InferenceEngine::TargetDevice InfEngineBackendNet::getTargetDevice() noexcept InferenceEngine::TargetDevice InfEngineBackendNet::getTargetDevice() noexcept
{ {
return InferenceEngine::TargetDevice::eCPU; return targetDevice;
} }
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t size) noexcept InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t size) noexcept
@ -234,7 +263,7 @@ size_t InfEngineBackendNet::getBatchSize() const noexcept
return 0; return 0;
} }
void InfEngineBackendNet::init() void InfEngineBackendNet::init(int targetId)
{ {
if (inputs.empty()) if (inputs.empty())
{ {
@ -307,6 +336,15 @@ void InfEngineBackendNet::init()
outBlobs[it.first] = allBlobs[it.first]; outBlobs[it.first] = allBlobs[it.first];
} }
switch (targetId)
{
case DNN_TARGET_CPU: setTargetDevice(InferenceEngine::TargetDevice::eCPU); break;
case DNN_TARGET_OPENCL_FP16: setPrecision(InferenceEngine::Precision::FP16); // Fallback to the next.
case DNN_TARGET_OPENCL: setTargetDevice(InferenceEngine::TargetDevice::eGPU); break;
default:
CV_Error(Error::StsError, format("Unknown target identifier: %d", targetId));
}
if (!isInitialized()) if (!isInitialized())
initPlugin(*this); initPlugin(*this);
} }
@ -317,10 +355,9 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
InferenceEngine::StatusCode status; InferenceEngine::StatusCode status;
InferenceEngine::ResponseDesc resp; InferenceEngine::ResponseDesc resp;
const InferenceEngine::Version* v = InferenceEngine::GetInferenceEngineVersion();
plugin = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(InferenceEngine::TargetDevice::eCPU); plugin = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(targetDevice);
if (std::atoi(v->buildNumber) > 5855) if (infEngineVersion() > 5855 && targetDevice == InferenceEngine::TargetDevice::eCPU)
{ {
#ifdef _WIN32 #ifdef _WIN32
InferenceEngine::IExtensionPtr extension = InferenceEngine::IExtensionPtr extension =
@ -360,7 +397,7 @@ void InfEngineBackendNet::forward()
CV_Error(Error::StsAssert, resp.msg); CV_Error(Error::StsAssert, resp.msg);
} }
static inline Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob) Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
{ {
// NOTE: Inference Engine sizes are reversed. // NOTE: Inference Engine sizes are reversed.
std::vector<size_t> dims = blob->dims(); std::vector<size_t> dims = blob->dims();
@ -369,56 +406,6 @@ static inline Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
return Mat(size, CV_32F, (void*)blob->buffer()); return Mat(size, CV_32F, (void*)blob->buffer());
} }
void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& conv,
const Mat& w, const Mat& b)
{
CV_Assert(!w.empty() || !b.empty());
if (!w.empty())
{
// Get convolution's weights. Clone the data because Inference Engine can host it
// and conv->_weights->allocate() below will deallocate it.
Mat originWeights = infEngineBlobToMat(conv->_weights).clone();
// Create new weights blob.
conv->_weights = InferenceEngine::make_shared_blob<float>(
InferenceEngine::Precision::FP32, conv->_weights->dims());
conv->_weights->allocate();
// Convolution weights have OIHW data layout.
// (conv(I) + b1 ) * w + b2
// w*conv(I) + b1 * w + b2
Mat fusedWeights = infEngineBlobToMat(conv->_weights);
const int numChannels = fusedWeights.size[0];
// Mat weights = blobs[0].reshape(1, 1);
// Mat bias = hasBias ? blobs[1].reshape(1, 1) : Mat();
CV_Assert(numChannels == w.total());
CV_Assert(b.empty() || numChannels == b.total());
for (int i = 0; i < numChannels; ++i)
{
cv::multiply(slice(originWeights, i), w.at<float>(i), slice(fusedWeights, i));
}
}
if (conv->_biases)
{
// The same for biases.
Mat originBiases = infEngineBlobToMat(conv->_biases).clone();
conv->_biases = InferenceEngine::make_shared_blob<float>(
InferenceEngine::Precision::FP32, conv->_biases->dims());
conv->_biases->allocate();
Mat fusedBiases = infEngineBlobToMat(conv->_biases);
originBiases.copyTo(fusedBiases);
if (!w.empty())
cv::multiply(w.reshape(1, fusedBiases.dims, &fusedBiases.size[0]), fusedBiases, fusedBiases);
if (!b.empty())
cv::add(fusedBiases, b.reshape(1, fusedBiases.dims, &fusedBiases.size[0]), fusedBiases);
}
else
conv->_biases = wrapToInfEngineBlob(b);
}
InfEngineBackendLayer::InfEngineBackendLayer(const InferenceEngine::DataPtr& output_) InfEngineBackendLayer::InfEngineBackendLayer(const InferenceEngine::DataPtr& output_)
{ {
output = output_; output = output_;
@ -454,6 +441,16 @@ void InfEngineBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArra
CV_Error(Error::StsInternal, "Choose Inference Engine as a preferable backend."); CV_Error(Error::StsInternal, "Choose Inference Engine as a preferable backend.");
} }
InferenceEngine::TBlob<int16_t>::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob)
{
auto halfs = InferenceEngine::make_shared_blob<int16_t>(InferenceEngine::Precision::FP16, blob->layout(), blob->dims());
halfs->allocate();
Mat floatsData(1, blob->size(), CV_32F, blob->buffer());
Mat halfsData(1, blob->size(), CV_16SC1, halfs->buffer());
convertFp16(floatsData, halfsData);
return halfs;
}
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE
bool haveInfEngine() bool haveInfEngine()

@ -32,6 +32,8 @@ public:
virtual void Release() noexcept CV_OVERRIDE; virtual void Release() noexcept CV_OVERRIDE;
void setPrecision(InferenceEngine::Precision p) noexcept;
virtual InferenceEngine::Precision getPrecision() noexcept CV_OVERRIDE; virtual InferenceEngine::Precision getPrecision() noexcept CV_OVERRIDE;
virtual void getOutputsInfo(InferenceEngine::OutputsDataMap &out) noexcept /*CV_OVERRIDE*/; virtual void getOutputsInfo(InferenceEngine::OutputsDataMap &out) noexcept /*CV_OVERRIDE*/;
@ -68,7 +70,7 @@ public:
virtual size_t getBatchSize() const noexcept CV_OVERRIDE; virtual size_t getBatchSize() const noexcept CV_OVERRIDE;
void init(); void init(int targetId);
void addBlobs(const std::vector<Ptr<BackendWrapper> >& wrappers); void addBlobs(const std::vector<Ptr<BackendWrapper> >& wrappers);
@ -83,6 +85,8 @@ private:
InferenceEngine::BlobMap inpBlobs; InferenceEngine::BlobMap inpBlobs;
InferenceEngine::BlobMap outBlobs; InferenceEngine::BlobMap outBlobs;
InferenceEngine::BlobMap allBlobs; InferenceEngine::BlobMap allBlobs;
InferenceEngine::TargetDevice targetDevice;
InferenceEngine::Precision precision;
InferenceEngine::InferenceEnginePluginPtr plugin; InferenceEngine::InferenceEnginePluginPtr plugin;
void initPlugin(InferenceEngine::ICNNNetwork& net); void initPlugin(InferenceEngine::ICNNNetwork& net);
@ -116,15 +120,17 @@ public:
InferenceEngine::TBlob<float>::Ptr blob; InferenceEngine::TBlob<float>::Ptr blob;
}; };
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m); InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape); InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape, InferenceEngine::Layout layout);
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr); InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr);
// Fuses convolution weights and biases with channel-wise scales and shifts. Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob);
void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& conv,
const Mat& w, const Mat& b = Mat()); // Convert Inference Engine blob with FP32 precision to FP16 precision.
// Allocates memory for a new blob.
InferenceEngine::TBlob<int16_t>::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob);
// This is a fake class to run networks from Model Optimizer. Objects of that // This is a fake class to run networks from Model Optimizer. Objects of that
// class simulate responses of layers are imported by OpenCV and supported by // class simulate responses of layers are imported by OpenCV and supported by
@ -151,7 +157,6 @@ private:
InferenceEngine::DataPtr output; InferenceEngine::DataPtr output;
}; };
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE
bool haveInfEngine(); bool haveInfEngine();

@ -80,14 +80,16 @@ public:
{ {
CV_Assert(inpId < node.input_size()); CV_Assert(inpId < node.input_size());
std::string name = node.input(inpId); std::string name = node.input(inpId);
// If operation produces several tensors, they are specified by index
// after ':' character. In example, "input:0".
name = name.substr(0, name.rfind(':'));
const int numNodes = net.node_size(); const int numNodes = net.node_size();
for (int i = 0; i < numNodes; ++i) for (int i = 0; i < numNodes; ++i)
{ {
if (net.node(i).name() == name) if (net.node(i).name() == name)
return net.node(i); return net.node(i);
} }
CV_Error(Error::StsParseError, "Input node with name " + name + " not found"); CV_ErrorNoReturn(Error::StsParseError, "Input node with name " + name + " not found");
return net.node(0); // just return something
} }
// Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused. // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
@ -400,6 +402,23 @@ private:
int numOutDims; int numOutDims;
}; };
class L2NormalizeSubgraph : public Subgraph
{
public:
L2NormalizeSubgraph()
{
int input = addNodeToMatch("");
int square = addNodeToMatch("Square", input);
int reductionIndices = addNodeToMatch("Const");
int sum = addNodeToMatch("Sum", square, reductionIndices);
int y = addNodeToMatch("Const");
int maximum = addNodeToMatch("Maximum", sum, y);
int rsqrt = addNodeToMatch("Rsqrt", maximum);
addNodeToMatch("Mul", input, rsqrt);
setFusedNode("L2Normalize", input, reductionIndices);
}
};
void simplifySubgraphs(tensorflow::GraphDef& net) void simplifySubgraphs(tensorflow::GraphDef& net)
{ {
std::vector<Ptr<Subgraph> > subgraphs; std::vector<Ptr<Subgraph> > subgraphs;
@ -410,6 +429,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph())); subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph())); subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3))); subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3)));
subgraphs.push_back(Ptr<Subgraph>(new L2NormalizeSubgraph()));
int numNodes = net.node_size(); int numNodes = net.node_size();
std::vector<int> matchedNodesIds; std::vector<int> matchedNodesIds;

@ -37,7 +37,13 @@ using ::google::protobuf::Reflection;
namespace namespace
{ {
static int toNCHW[] = {0, 2, 3, 1}; static int toNCHW(int idx)
{
CV_Assert(-4 <= idx && idx < 4);
if (idx == 0) return 0;
else if (idx > 0) return idx % 3 + 1;
else return (4 + idx) % 3 + 1;
}
// This values are used to indicate layer output's data layout where it's possible. // This values are used to indicate layer output's data layout where it's possible.
enum DataLayout enum DataLayout
@ -556,11 +562,23 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts) static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts)
{ {
if (hasLayerAttr(layer, "data_format"))
{
std::string format = getLayerAttr(layer, "data_format").s();
if (format == "NHWC" || format == "channels_last")
return DATA_LAYOUT_NHWC;
else if (format == "NCHW" || format == "channels_first")
return DATA_LAYOUT_NCHW;
else
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
}
// Determine layout by layer's inputs
int layout = DATA_LAYOUT_UNKNOWN; int layout = DATA_LAYOUT_UNKNOWN;
std::map<String, int>::const_iterator it; std::map<String, int>::const_iterator it;
for (int i = 0, n = layer.input_size(); i < n; ++i) for (int i = 0, n = layer.input_size(); i < n; ++i)
{ {
it = data_layouts.find(layer.input(i)); it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':')));
if (it != data_layouts.end()) if (it != data_layouts.end())
{ {
if (it->second == DATA_LAYOUT_UNKNOWN) if (it->second == DATA_LAYOUT_UNKNOWN)
@ -708,17 +726,7 @@ void TFImporter::populateNet(Net dstNet)
// one input only // one input only
connect(layer_id, dstNet, parsePin(input), id, 0); connect(layer_id, dstNet, parsePin(input), id, 0);
if (hasLayerAttr(layer, "data_format")) if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
{
std::string format = getLayerAttr(layer, "data_format").s();
if (format == "NHWC" || format == "channels_last")
data_layouts[name] = DATA_LAYOUT_NHWC;
else if (format == "NCHW" || format == "channels_first")
data_layouts[name] = DATA_LAYOUT_NCHW;
else
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
}
else
data_layouts[name] = DATA_LAYOUT_NHWC; data_layouts[name] = DATA_LAYOUT_NHWC;
} }
else if (type == "BiasAdd" || type == "Add") else if (type == "BiasAdd" || type == "Add")
@ -956,7 +964,7 @@ void TFImporter::populateNet(Net dstNet)
{ {
int axisId = (type == "Concat" ? 0 : layer.input_size() - 1); int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW[axis] : axis); layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis);
int id = dstNet.addLayer(name, "Concat", layerParams); int id = dstNet.addLayer(name, "Concat", layerParams);
layer_id[name] = id; layer_id[name] = id;
@ -1017,7 +1025,7 @@ void TFImporter::populateNet(Net dstNet)
// num_split // num_split
// 1st blob is dims tensor // 1st blob is dims tensor
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
layerParams.set("axis", toNCHW[axis]); layerParams.set("axis", toNCHW(axis));
int id = dstNet.addLayer(name, "Slice", layerParams); int id = dstNet.addLayer(name, "Slice", layerParams);
layer_id[name] = id; layer_id[name] = id;
@ -1410,9 +1418,26 @@ void TFImporter::populateNet(Net dstNet)
{ {
// op: "L2Normalize" // op: "L2Normalize"
// input: "input" // input: "input"
CV_Assert(layer.input_size() == 1); // input: "reduction_indices" (axis)
layerParams.set("across_spatial", false); CV_Assert(layer.input_size() == 2);
layerParams.set("channel_shared", false); Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
CV_Assert(reductionIndices.type() == CV_32SC1);
const int numAxes = reductionIndices.total();
if (data_layouts[name] == DATA_LAYOUT_NHWC)
for (int i = 0; i < numAxes; ++i)
reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
for (int i = 1; i < numAxes; ++i)
{
CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
// Axes have the same sign.
CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
}
layerParams.set("start_axis", reductionIndices.at<int>(0));
layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
int id = dstNet.addLayer(name, "Normalize", layerParams); int id = dstNet.addLayer(name, "Normalize", layerParams);
layer_id[name] = id; layer_id[name] = id;
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);

@ -100,6 +100,8 @@ public:
TEST_P(DNNTestNetwork, AlexNet) TEST_P(DNNTestNetwork, AlexNet)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt", processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
Size(227, 227), "prob", Size(227, 227), "prob",
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_alexnet.yml" : target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_alexnet.yml" :
@ -108,6 +110,8 @@ TEST_P(DNNTestNetwork, AlexNet)
TEST_P(DNNTestNetwork, ResNet_50) TEST_P(DNNTestNetwork, ResNet_50)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt", processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
Size(224, 224), "prob", Size(224, 224), "prob",
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_resnet_50.yml" : target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_resnet_50.yml" :
@ -116,6 +120,8 @@ TEST_P(DNNTestNetwork, ResNet_50)
TEST_P(DNNTestNetwork, SqueezeNet_v1_1) TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt", processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt",
Size(227, 227), "prob", Size(227, 227), "prob",
target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml" : target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml" :
@ -124,6 +130,8 @@ TEST_P(DNNTestNetwork, SqueezeNet_v1_1)
TEST_P(DNNTestNetwork, GoogLeNet) TEST_P(DNNTestNetwork, GoogLeNet)
{ {
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt", processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
Size(224, 224), "prob"); Size(224, 224), "prob");
} }
@ -147,7 +155,9 @@ TEST_P(DNNTestNetwork, ENet)
TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/street.png", false)); Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
@ -157,7 +167,9 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow) TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/street.png", false)); Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt", processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt",
@ -177,35 +189,45 @@ TEST_P(DNNTestNetwork, SSD_VGG16)
TEST_P(DNNTestNetwork, OpenPose_pose_coco) TEST_P(DNNTestNetwork, OpenPose_pose_coco)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
double l1 = target == DNN_TARGET_OPENCL_FP16 ? 3e-5 : 1e-5;
double lInf = target == DNN_TARGET_OPENCL_FP16 ? 3e-3 : 1e-4;
processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt", processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt",
Size(368, 368), ""); Size(368, 368), "", "", l1, lInf);
} }
TEST_P(DNNTestNetwork, OpenPose_pose_mpi) TEST_P(DNNTestNetwork, OpenPose_pose_mpi)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
double l1 = target == DNN_TARGET_OPENCL_FP16 ? 4e-5 : 1e-5;
double lInf = target == DNN_TARGET_OPENCL_FP16 ? 7e-3 : 1e-4;
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt", processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt",
Size(368, 368), ""); Size(368, 368), "", "", l1, lInf);
} }
TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages) TEST_P(DNNTestNetwork, OpenPose_pose_mpi_faster_4_stages)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
double l1 = target == DNN_TARGET_OPENCL_FP16 ? 5e-5 : 1e-5;
double lInf = target == DNN_TARGET_OPENCL_FP16 ? 5e-3 : 1e-4;
// The same .caffemodel but modified .prototxt // The same .caffemodel but modified .prototxt
// See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp // See https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/pose/poseParameters.cpp
processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt", processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi_faster_4_stages.prototxt",
Size(368, 368), ""); Size(368, 368), "", "", l1, lInf);
} }
TEST_P(DNNTestNetwork, OpenFace) TEST_P(DNNTestNetwork, OpenFace)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), ""); processNet("dnn/openface_nn4.small2.v1.t7", "", Size(96, 96), "");
} }
TEST_P(DNNTestNetwork, opencv_face_detector) TEST_P(DNNTestNetwork, opencv_face_detector)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false)); Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
@ -214,13 +236,23 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
{ {
if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/street.png", false)); Mat sample = imread(findDataFile("dnn/street.png", false));
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
inp, "detection_out"); inp, "detection_out");
} }
TEST_P(DNNTestNetwork, DenseNet_121)
{
if (backend == DNN_BACKEND_HALIDE ||
backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
throw SkipTestException("");
processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "caffe");
}
const tuple<DNNBackend, DNNTarget> testCases[] = { const tuple<DNNBackend, DNNTarget> testCases[] = {
#ifdef HAVE_HALIDE #ifdef HAVE_HALIDE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU), tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),
@ -228,6 +260,8 @@ const tuple<DNNBackend, DNNTarget> testCases[] = {
#endif #endif
#ifdef HAVE_INF_ENGINE #ifdef HAVE_INF_ENGINE
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU), tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_CPU),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL),
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_INFERENCE_ENGINE, DNN_TARGET_OPENCL_FP16),
#endif #endif
tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL) tuple<DNNBackend, DNNTarget>(DNN_BACKEND_DEFAULT, DNN_TARGET_OPENCL)
}; };

@ -866,6 +866,44 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
normAssert(outDefault, out); normAssert(outDefault, out);
} }
// 1. Create a .prototxt file with the following network:
// layer {
// type: "Input" name: "data" top: "data"
// input_param { shape { dim: 1 dim: 2 dim: 3 } }
// }
// layer {
// type: "Input" name: "second_input" top: "second_input"
// input_param { shape { dim: 1 dim: 2 dim: 3 } }
// }
// layer {
// type: "Eltwise" name: "output" top: "output"
// bottom: "data" bottom: "second_input"
// eltwise_param { operation: SUM }
// }
//
// 2. Create a .caffemodel file using Caffe:
//
// import caffe
// net = caffe.Net('/path/to/prototxt', caffe.TEST)
// net.save('/path/to/caffemodel')
//
// 3. Convert using ModelOptimizer.
TEST(Test_DLDT, two_inputs)
{
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
int inpSize[] = {1, 2, 3};
Mat firstInp(3, &inpSize[0], CV_32F);
Mat secondInp(3, &inpSize[0], CV_32F);
randu(firstInp, -1, 1);
randu(secondInp, -1, 1);
net.setInput(firstInp, "data");
net.setInput(secondInp, "second_input");
Mat out = net.forward();
normAssert(out, firstInp + secondInp);
}
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE
}} // namespace }} // namespace

@ -53,7 +53,7 @@ namespace opencv_test {
using namespace cv::dnn; using namespace cv::dnn;
CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE) CV_ENUM(DNNBackend, DNN_BACKEND_DEFAULT, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE)
CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL) CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)
static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets() static testing::internal::ParamGenerator<DNNTarget> availableDnnTargets()
{ {

@ -193,6 +193,13 @@ TEST_P(Test_TensorFlow_layers, reshape)
runTensorFlowNet("unfused_flatten_unknown_batch", targetId); runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
} }
TEST_P(Test_TensorFlow_layers, l2_normalize)
{
int targetId = GetParam();
runTensorFlowNet("l2_normalize", targetId);
runTensorFlowNet("l2_normalize_3d", targetId);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets()); INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets; typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;

@ -229,7 +229,7 @@ OCL_PERF_TEST_P(RemapFixture, Remap,
OCL_TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode); OCL_TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
SANITY_CHECK(dst, eps); SANITY_CHECK_NOTHING();
} }
} } // namespace opencv_test::ocl } } // namespace opencv_test::ocl

@ -202,8 +202,8 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
PERF_TEST_P( TestRemap, remap, PERF_TEST_P( TestRemap, remap,
Combine( Combine(
Values( TYPICAL_MAT_TYPES ), Values( CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1 ),
Values( szVGA, sz720p, sz1080p ), Values( szVGA, sz1080p ),
InterType::all(), InterType::all(),
BorderMode::all(), BorderMode::all(),
RemapMode::all() RemapMode::all()
@ -231,7 +231,7 @@ PERF_TEST_P( TestRemap, remap,
remap(source, destination, map_x, map_y, interpolationType, borderMode); remap(source, destination, map_x, map_y, interpolationType, borderMode);
} }
SANITY_CHECK(destination, 1); SANITY_CHECK_NOTHING();
} }
void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode ) void update_map(const Mat& src, Mat& map_x, Mat& map_y, const int remapMode )

@ -547,7 +547,7 @@ static bool ocl_Laplacian5(InputArray _src, OutputArray _dst,
size_t src_step = _src.step(), src_offset = _src.offset(); size_t src_step = _src.step(), src_offset = _src.offset();
const size_t tileSizeYmax = wgs / tileSizeX; const size_t tileSizeYmax = wgs / tileSizeX;
// workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory // workaround for NVIDIA: 3 channel vector type takes 4*elem_size in local memory
int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn; int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn;
if (((src_offset % src_step) % esz == 0) && if (((src_offset % src_step) % esz == 0) &&

@ -36,19 +36,25 @@ public:
typedef fixedpoint64 WT; typedef fixedpoint64 WT;
CV_ALWAYS_INLINE fixedpoint64() { val = 0; } CV_ALWAYS_INLINE fixedpoint64() { val = 0; }
CV_ALWAYS_INLINE fixedpoint64(const int8_t& _val) { val = ((int64_t)_val) << fixedShift; } CV_ALWAYS_INLINE fixedpoint64(const int8_t& _val) { val = ((int64_t)_val) << fixedShift; }
CV_ALWAYS_INLINE fixedpoint64(const uint8_t& _val) { val = ((int64_t)_val) << fixedShift; }
CV_ALWAYS_INLINE fixedpoint64(const int16_t& _val) { val = ((int64_t)_val) << fixedShift; } CV_ALWAYS_INLINE fixedpoint64(const int16_t& _val) { val = ((int64_t)_val) << fixedShift; }
CV_ALWAYS_INLINE fixedpoint64(const uint16_t& _val) { val = ((int64_t)_val) << fixedShift; }
CV_ALWAYS_INLINE fixedpoint64(const int32_t& _val) { val = ((int64_t)_val) << fixedShift; } CV_ALWAYS_INLINE fixedpoint64(const int32_t& _val) { val = ((int64_t)_val) << fixedShift; }
CV_ALWAYS_INLINE fixedpoint64(const cv::softdouble& _val) { val = cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); } CV_ALWAYS_INLINE fixedpoint64(const cv::softdouble& _val) { val = cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); }
CV_ALWAYS_INLINE fixedpoint64& operator = (const int8_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE fixedpoint64& operator = (const int8_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE fixedpoint64& operator = (const uint8_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE fixedpoint64& operator = (const int16_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE fixedpoint64& operator = (const int16_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE fixedpoint64& operator = (const uint16_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE fixedpoint64& operator = (const int32_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE fixedpoint64& operator = (const int32_t& _val) { val = ((int64_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE fixedpoint64& operator = (const cv::softdouble& _val) { val = cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); return *this; } CV_ALWAYS_INLINE fixedpoint64& operator = (const cv::softdouble& _val) { val = cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); return *this; }
CV_ALWAYS_INLINE fixedpoint64& operator = (const fixedpoint64& _val) { val = _val.val; return *this; } CV_ALWAYS_INLINE fixedpoint64& operator = (const fixedpoint64& _val) { val = _val.val; return *this; }
template <typename ET> CV_ALWAYS_INLINE fixedpoint64 operator * (const int8_t& val2) const { return operator *(fixedpoint64(val2)); }
CV_ALWAYS_INLINE fixedpoint64 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types CV_ALWAYS_INLINE fixedpoint64 operator * (const uint8_t& val2) const { return operator *(fixedpoint64(val2)); }
CV_ALWAYS_INLINE fixedpoint64 operator * (const int16_t& val2) const { return operator *(fixedpoint64(val2)); }
CV_ALWAYS_INLINE fixedpoint64 operator * (const uint16_t& val2) const { return operator *(fixedpoint64(val2)); }
CV_ALWAYS_INLINE fixedpoint64 operator * (const int32_t& val2) const { return operator *(fixedpoint64(val2)); }
CV_ALWAYS_INLINE fixedpoint64 operator * (const fixedpoint64& val2) const CV_ALWAYS_INLINE fixedpoint64 operator * (const fixedpoint64& val2) const
{ {
//Assume -0x00000000C0000000 <= val2 <=0x0000000100000000 INT64_MIN <= val <= INT64_MAX, so shifted multiplication result is inside [INT64_MIN, INT64_MAX] range
uint64_t uval = (uint64_t)((val ^ (val >> 63)) - (val >> 63)); uint64_t uval = (uint64_t)((val ^ (val >> 63)) - (val >> 63));
uint64_t umul = (uint64_t)((val2.val ^ (val2.val >> 63)) - (val2.val >> 63)); uint64_t umul = (uint64_t)((val2.val ^ (val2.val >> 63)) - (val2.val >> 63));
int64_t ressign = (val >> 63) ^ (val2.val >> 63); int64_t ressign = (val >> 63) ^ (val2.val >> 63);
@ -61,6 +67,9 @@ public:
uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32); uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32);
val0_l &= 0xFFFFFFFF; val0_l &= 0xFFFFFFFF;
if ( (sh2 >> 32) || (val0_h >> ressign ? 32 : 31) )
return (ressign ? ~(int64_t)0x7FFFFFFFFFFFFFFF : (int64_t)0x7FFFFFFFFFFFFFFF);
if (ressign) if (ressign)
{ {
val0_l = (~val0_l + 1) & 0xFFFFFFFF; val0_l = (~val0_l + 1) & 0xFFFFFFFF;
@ -68,16 +77,19 @@ public:
} }
return (int64_t)(val0_h << 32 | val0_l); return (int64_t)(val0_h << 32 | val0_l);
} }
CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const { return fixedpoint64(val + val2.val); } CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const
CV_ALWAYS_INLINE fixedpoint64 operator - (const fixedpoint64& val2) const { return fixedpoint64(val - val2.val); } {
// CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const int64_t res = val + val2.val;
// { return ((val ^ res) & (val2.val ^ res)) >> 63 ? ~(res & ~0x7FFFFFFFFFFFFFFF) : res;
// int64_t nfrac = (int64_t)frac + val2.frac; }
// int64_t nval = (int64_t)val + val2.val + nfrac >> 32; CV_ALWAYS_INLINE fixedpoint64 operator - (const fixedpoint64& val2) const
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0); {
// } int64_t res = val - val2.val;
return ((val ^ val2.val) & (val ^ res)) >> 63 ? ~(res & ~0x7FFFFFFFFFFFFFFF) : res;
}
CV_ALWAYS_INLINE fixedpoint64 operator >> (int n) const { return fixedpoint64(val >> n); } CV_ALWAYS_INLINE fixedpoint64 operator >> (int n) const { return fixedpoint64(val >> n); }
CV_ALWAYS_INLINE fixedpoint64 operator << (int n) const { return fixedpoint64(val << n); } CV_ALWAYS_INLINE fixedpoint64 operator << (int n) const { return fixedpoint64(val << n); }
CV_ALWAYS_INLINE bool operator == (const fixedpoint64& val2) const { return val == val2.val; }
template <typename ET> template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int64_t)fixedround((uint64_t)val) >> fixedShift); } CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int64_t)fixedround((uint64_t)val) >> fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); } CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); }
@ -108,11 +120,11 @@ public:
CV_ALWAYS_INLINE ufixedpoint64& operator = (const uint32_t& _val) { val = ((uint64_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE ufixedpoint64& operator = (const uint32_t& _val) { val = ((uint64_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE ufixedpoint64& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint64_t)cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); return *this; } CV_ALWAYS_INLINE ufixedpoint64& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint64_t)cvRound64(_val * cv::softdouble((int64_t)(1LL << fixedShift))); return *this; }
CV_ALWAYS_INLINE ufixedpoint64& operator = (const ufixedpoint64& _val) { val = _val.val; return *this; } CV_ALWAYS_INLINE ufixedpoint64& operator = (const ufixedpoint64& _val) { val = _val.val; return *this; }
template <typename ET> CV_ALWAYS_INLINE ufixedpoint64 operator * (const uint8_t& val2) const { return operator *(ufixedpoint64(val2)); }
CV_ALWAYS_INLINE ufixedpoint64 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types CV_ALWAYS_INLINE ufixedpoint64 operator * (const uint16_t& val2) const { return operator *(ufixedpoint64(val2)); }
CV_ALWAYS_INLINE ufixedpoint64 operator * (const uint32_t& val2) const { return operator *(ufixedpoint64(val2)); }
CV_ALWAYS_INLINE ufixedpoint64 operator * (const ufixedpoint64& val2) const CV_ALWAYS_INLINE ufixedpoint64 operator * (const ufixedpoint64& val2) const
{ {
//Assume val2 <=0x0000000100000000, so shifted multiplication result is less than val and therefore than UINT64_MAX
uint64_t sh0 = fixedround((val & 0xFFFFFFFF) * (val2.val & 0xFFFFFFFF)); uint64_t sh0 = fixedround((val & 0xFFFFFFFF) * (val2.val & 0xFFFFFFFF));
uint64_t sh1_0 = (val >> 32) * (val2.val & 0xFFFFFFFF); uint64_t sh1_0 = (val >> 32) * (val2.val & 0xFFFFFFFF);
uint64_t sh1_1 = (val & 0xFFFFFFFF) * (val2.val >> 32); uint64_t sh1_1 = (val & 0xFFFFFFFF) * (val2.val >> 32);
@ -121,18 +133,23 @@ public:
uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32); uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32);
val0_l &= 0xFFFFFFFF; val0_l &= 0xFFFFFFFF;
if ((sh2 >> 32) || (val0_h >> 32))
return ((uint64_t)0xFFFFFFFFFFFFFFFF);
return val0_h << 32 | val0_l; return val0_h << 32 | val0_l;
} }
CV_ALWAYS_INLINE ufixedpoint64 operator + (const ufixedpoint64& val2) const { return ufixedpoint64(val + val2.val); } CV_ALWAYS_INLINE ufixedpoint64 operator + (const ufixedpoint64& val2) const
CV_ALWAYS_INLINE ufixedpoint64 operator - (const ufixedpoint64& val2) const { return ufixedpoint64(val - val2.val); } {
// CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const uint64_t res = val + val2.val;
// { return (val > res) ? (uint64_t)0xFFFFFFFFFFFFFFFF : res;
// int64_t nfrac = (int64_t)frac + val2.frac; }
// int64_t nval = (int64_t)val + val2.val + nfrac >> 32; CV_ALWAYS_INLINE ufixedpoint64 operator - (const ufixedpoint64& val2) const
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0); {
// } return val > val2.val ? (val - val2.val) : 0;
}
CV_ALWAYS_INLINE ufixedpoint64 operator >> (int n) const { return ufixedpoint64(val >> n); } CV_ALWAYS_INLINE ufixedpoint64 operator >> (int n) const { return ufixedpoint64(val >> n); }
CV_ALWAYS_INLINE ufixedpoint64 operator << (int n) const { return ufixedpoint64(val << n); } CV_ALWAYS_INLINE ufixedpoint64 operator << (int n) const { return ufixedpoint64(val << n); }
CV_ALWAYS_INLINE bool operator == (const ufixedpoint64& val2) const { return val == val2.val; }
template <typename ET> template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); } CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); } CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); }
@ -163,21 +180,26 @@ public:
CV_ALWAYS_INLINE fixedpoint32& operator = (const int16_t& _val) { val = ((int32_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE fixedpoint32& operator = (const int16_t& _val) { val = ((int32_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE fixedpoint32& operator = (const cv::softdouble& _val) { val = (int32_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; } CV_ALWAYS_INLINE fixedpoint32& operator = (const cv::softdouble& _val) { val = (int32_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; }
CV_ALWAYS_INLINE fixedpoint32& operator = (const fixedpoint32& _val) { val = _val.val; return *this; } CV_ALWAYS_INLINE fixedpoint32& operator = (const fixedpoint32& _val) { val = _val.val; return *this; }
template <typename ET> CV_ALWAYS_INLINE fixedpoint32 operator * (const int8_t& val2) const { return cv::saturate_cast<int32_t>((int64_t)val * val2); }
CV_ALWAYS_INLINE fixedpoint32 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types CV_ALWAYS_INLINE fixedpoint32 operator * (const uint8_t& val2) const { return cv::saturate_cast<int32_t>((int64_t)val * val2); }
CV_ALWAYS_INLINE fixedpoint32 operator * (const int16_t& val2) const { return cv::saturate_cast<int32_t>((int64_t)val * val2); }
CV_ALWAYS_INLINE fixedpoint64 operator * (const fixedpoint32& val2) const { return (int64_t)val * (int64_t)(val2.val); } CV_ALWAYS_INLINE fixedpoint64 operator * (const fixedpoint32& val2) const { return (int64_t)val * (int64_t)(val2.val); }
CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const { return fixedpoint32(val + val2.val); } CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const
CV_ALWAYS_INLINE fixedpoint32 operator - (const fixedpoint32& val2) const { return fixedpoint32(val - val2.val); } {
// CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const int32_t res = val + val2.val;
// { return ((val ^ res) & (val2.val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
// int32_t nfrac = (int32_t)frac + val2.frac; }
// int32_t nval = (int32_t)val + val2.val + nfrac >> 32; CV_ALWAYS_INLINE fixedpoint32 operator - (const fixedpoint32& val2) const
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0); {
// } int32_t res = val - val2.val;
return ((val ^ val2.val) & (val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
}
CV_ALWAYS_INLINE fixedpoint32 operator >> (int n) const { return fixedpoint32(val >> n); } CV_ALWAYS_INLINE fixedpoint32 operator >> (int n) const { return fixedpoint32(val >> n); }
CV_ALWAYS_INLINE fixedpoint32 operator << (int n) const { return fixedpoint32(val << n); } CV_ALWAYS_INLINE fixedpoint32 operator << (int n) const { return fixedpoint32(val << n); }
CV_ALWAYS_INLINE bool operator == (const fixedpoint32& val2) const { return val == val2.val; }
template <typename ET> template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int32_t)fixedround((uint32_t)val) >> fixedShift); } CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int32_t)fixedround((uint32_t)val) >> fixedShift); }
CV_ALWAYS_INLINE operator fixedpoint64() const { return (int64_t)val << (fixedpoint64::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; } CV_ALWAYS_INLINE bool isZero() { return val == 0; }
@ -204,21 +226,24 @@ public:
CV_ALWAYS_INLINE ufixedpoint32& operator = (const uint16_t& _val) { val = ((uint32_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE ufixedpoint32& operator = (const uint16_t& _val) { val = ((uint32_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE ufixedpoint32& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint32_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; } CV_ALWAYS_INLINE ufixedpoint32& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint32_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; }
CV_ALWAYS_INLINE ufixedpoint32& operator = (const ufixedpoint32& _val) { val = _val.val; return *this; } CV_ALWAYS_INLINE ufixedpoint32& operator = (const ufixedpoint32& _val) { val = _val.val; return *this; }
template <typename ET> CV_ALWAYS_INLINE ufixedpoint32 operator * (const uint8_t& val2) const { return cv::saturate_cast<uint32_t>((uint64_t)val * val2); }
CV_ALWAYS_INLINE ufixedpoint32 operator * (const ET& val2) const { return val * val2; } // Wrong rounding is possible for floating point types CV_ALWAYS_INLINE ufixedpoint32 operator * (const uint16_t& val2) const { return cv::saturate_cast<uint32_t>((uint64_t)val * val2); }
CV_ALWAYS_INLINE ufixedpoint64 operator * (const ufixedpoint32& val2) const { return (uint64_t)val * (uint64_t)(val2.val); } CV_ALWAYS_INLINE ufixedpoint64 operator * (const ufixedpoint32& val2) const { return (uint64_t)val * (uint64_t)(val2.val); }
CV_ALWAYS_INLINE ufixedpoint32 operator + (const ufixedpoint32& val2) const { return ufixedpoint32(val + val2.val); } CV_ALWAYS_INLINE ufixedpoint32 operator + (const ufixedpoint32& val2) const
CV_ALWAYS_INLINE ufixedpoint32 operator - (const ufixedpoint32& val2) const { return ufixedpoint32(val - val2.val); } {
// CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const uint32_t res = val + val2.val;
// { return (val > res) ? 0xFFFFFFFF : res;
// int32_t nfrac = (int32_t)frac + val2.frac; }
// int32_t nval = (int32_t)val + val2.val + nfrac >> 32; CV_ALWAYS_INLINE ufixedpoint32 operator - (const ufixedpoint32& val2) const
// return nval > MAXINT32 ? beConv(MAXINT32, MAXINT32) : beConv((int32_t)(nval), 0); {
// } return val > val2.val ? (val - val2.val) : 0;
}
CV_ALWAYS_INLINE ufixedpoint32 operator >> (int n) const { return ufixedpoint32(val >> n); } CV_ALWAYS_INLINE ufixedpoint32 operator >> (int n) const { return ufixedpoint32(val >> n); }
CV_ALWAYS_INLINE ufixedpoint32 operator << (int n) const { return ufixedpoint32(val << n); } CV_ALWAYS_INLINE ufixedpoint32 operator << (int n) const { return ufixedpoint32(val << n); }
CV_ALWAYS_INLINE bool operator == (const ufixedpoint32& val2) const { return val == val2.val; }
template <typename ET> template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); } CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE operator ufixedpoint64() const { return (uint64_t)val << (ufixedpoint64::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; } CV_ALWAYS_INLINE bool isZero() { return val == 0; }
@ -239,20 +264,28 @@ public:
typedef fixedpoint32 WT; typedef fixedpoint32 WT;
CV_ALWAYS_INLINE fixedpoint16() { val = 0; } CV_ALWAYS_INLINE fixedpoint16() { val = 0; }
CV_ALWAYS_INLINE fixedpoint16(const int8_t& _val) { val = ((int16_t)_val) << fixedShift; } CV_ALWAYS_INLINE fixedpoint16(const int8_t& _val) { val = ((int16_t)_val) << fixedShift; }
CV_ALWAYS_INLINE fixedpoint16(const uint8_t& _val) { val = ((int16_t)_val) << fixedShift; }
CV_ALWAYS_INLINE fixedpoint16(const cv::softdouble& _val) { val = (int16_t)cvRound(_val * cv::softdouble((1 << fixedShift))); } CV_ALWAYS_INLINE fixedpoint16(const cv::softdouble& _val) { val = (int16_t)cvRound(_val * cv::softdouble((1 << fixedShift))); }
CV_ALWAYS_INLINE fixedpoint16& operator = (const int8_t& _val) { val = ((int16_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE fixedpoint16& operator = (const int8_t& _val) { val = ((int16_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE fixedpoint16& operator = (const cv::softdouble& _val) { val = (int16_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; } CV_ALWAYS_INLINE fixedpoint16& operator = (const cv::softdouble& _val) { val = (int16_t)cvRound(_val * cv::softdouble((1 << fixedShift))); return *this; }
CV_ALWAYS_INLINE fixedpoint16& operator = (const fixedpoint16& _val) { val = _val.val; return *this; } CV_ALWAYS_INLINE fixedpoint16& operator = (const fixedpoint16& _val) { val = _val.val; return *this; }
template <typename ET> CV_ALWAYS_INLINE fixedpoint16 operator * (const int8_t& val2) const { return cv::saturate_cast<int16_t>((int32_t)val * val2); }
CV_ALWAYS_INLINE fixedpoint16 operator * (const ET& val2) const { return (int16_t)(val * val2); } // Wrong rounding is possible for floating point types
CV_ALWAYS_INLINE fixedpoint32 operator * (const fixedpoint16& val2) const { return (int32_t)val * (int32_t)(val2.val); } CV_ALWAYS_INLINE fixedpoint32 operator * (const fixedpoint16& val2) const { return (int32_t)val * (int32_t)(val2.val); }
CV_ALWAYS_INLINE fixedpoint16 operator + (const fixedpoint16& val2) const { return fixedpoint16((int16_t)(val + val2.val)); } CV_ALWAYS_INLINE fixedpoint16 operator + (const fixedpoint16& val2) const
CV_ALWAYS_INLINE fixedpoint16 operator - (const fixedpoint16& val2) const { return fixedpoint16((int16_t)(val - val2.val)); } {
int16_t res = val + val2.val;
return ((val ^ res) & (val2.val ^ res)) >> 15 ? (int16_t)(~(res & ~0x7FFF)) : res;
}
CV_ALWAYS_INLINE fixedpoint16 operator - (const fixedpoint16& val2) const
{
int16_t res = val - val2.val;
return ((val ^ val2.val) & (val ^ res)) >> 15 ? (int16_t)(~(res & ~(int16_t)0x7FFF)) : res;
}
CV_ALWAYS_INLINE fixedpoint16 operator >> (int n) const { return fixedpoint16((int16_t)(val >> n)); } CV_ALWAYS_INLINE fixedpoint16 operator >> (int n) const { return fixedpoint16((int16_t)(val >> n)); }
CV_ALWAYS_INLINE fixedpoint16 operator << (int n) const { return fixedpoint16((int16_t)(val << n)); } CV_ALWAYS_INLINE fixedpoint16 operator << (int n) const { return fixedpoint16((int16_t)(val << n)); }
CV_ALWAYS_INLINE bool operator == (const fixedpoint16& val2) const { return val == val2.val; }
template <typename ET> template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int16_t)fixedround((uint16_t)val) >> fixedShift); } CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int16_t)fixedround((uint16_t)val) >> fixedShift); }
CV_ALWAYS_INLINE operator fixedpoint32() const { return (int32_t)val << (fixedpoint32::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; } CV_ALWAYS_INLINE bool isZero() { return val == 0; }
@ -276,15 +309,23 @@ public:
CV_ALWAYS_INLINE ufixedpoint16& operator = (const uint8_t& _val) { val = ((uint16_t)_val) << fixedShift; return *this; } CV_ALWAYS_INLINE ufixedpoint16& operator = (const uint8_t& _val) { val = ((uint16_t)_val) << fixedShift; return *this; }
CV_ALWAYS_INLINE ufixedpoint16& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint16_t)cvRound(_val * cv::softdouble((int32_t)(1 << fixedShift))); return *this; } CV_ALWAYS_INLINE ufixedpoint16& operator = (const cv::softdouble& _val) { val = _val.getSign() ? 0 : (uint16_t)cvRound(_val * cv::softdouble((int32_t)(1 << fixedShift))); return *this; }
CV_ALWAYS_INLINE ufixedpoint16& operator = (const ufixedpoint16& _val) { val = _val.val; return *this; } CV_ALWAYS_INLINE ufixedpoint16& operator = (const ufixedpoint16& _val) { val = _val.val; return *this; }
template <typename ET> CV_ALWAYS_INLINE ufixedpoint16 operator * (const uint8_t& val2) const { return cv::saturate_cast<uint16_t>((uint32_t)val * val2); }
CV_ALWAYS_INLINE ufixedpoint16 operator * (const ET& val2) const { return (uint16_t)(val * val2); } // Wrong rounding is possible for floating point types
CV_ALWAYS_INLINE ufixedpoint32 operator * (const ufixedpoint16& val2) const { return ((uint32_t)val * (uint32_t)(val2.val)); } CV_ALWAYS_INLINE ufixedpoint32 operator * (const ufixedpoint16& val2) const { return ((uint32_t)val * (uint32_t)(val2.val)); }
CV_ALWAYS_INLINE ufixedpoint16 operator + (const ufixedpoint16& val2) const { return ufixedpoint16((uint16_t)(val + val2.val)); } CV_ALWAYS_INLINE ufixedpoint16 operator + (const ufixedpoint16& val2) const
CV_ALWAYS_INLINE ufixedpoint16 operator - (const ufixedpoint16& val2) const { return ufixedpoint16((uint16_t)(val - val2.val)); } {
uint16_t res = val + val2.val;
return (val > res) ? (uint16_t)0xFFFF : res;
}
CV_ALWAYS_INLINE ufixedpoint16 operator - (const ufixedpoint16& val2) const
{
return val > val2.val ? (uint16_t)(val - val2.val) : (uint16_t)0;
}
CV_ALWAYS_INLINE ufixedpoint16 operator >> (int n) const { return ufixedpoint16((uint16_t)(val >> n)); } CV_ALWAYS_INLINE ufixedpoint16 operator >> (int n) const { return ufixedpoint16((uint16_t)(val >> n)); }
CV_ALWAYS_INLINE ufixedpoint16 operator << (int n) const { return ufixedpoint16((uint16_t)(val << n)); } CV_ALWAYS_INLINE ufixedpoint16 operator << (int n) const { return ufixedpoint16((uint16_t)(val << n)); }
CV_ALWAYS_INLINE bool operator == (const ufixedpoint16& val2) const { return val == val2.val; }
template <typename ET> template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); } CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE operator ufixedpoint32() const { return (uint32_t)val << (ufixedpoint32::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); } CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; } CV_ALWAYS_INLINE bool isZero() { return val == 0; }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -77,6 +77,7 @@ protected:
virtual void run_func() = 0; virtual void run_func() = 0;
virtual void run_reference_func() = 0; virtual void run_reference_func() = 0;
virtual float get_success_error_level(int _interpolation, int _depth) const;
virtual void validate_results() const; virtual void validate_results() const;
virtual void prepare_test_data_for_reference_func(); virtual void prepare_test_data_for_reference_func();
@ -229,6 +230,20 @@ void CV_ImageWarpBaseTest::run(int)
ts->set_gtest_status(); ts->set_gtest_status();
} }
float CV_ImageWarpBaseTest::get_success_error_level(int _interpolation, int) const
{
if (_interpolation == INTER_CUBIC)
return 1.0f;
else if (_interpolation == INTER_LANCZOS4)
return 1.0f;
else if (_interpolation == INTER_NEAREST)
return 1.0f;
else if (_interpolation == INTER_AREA)
return 2.0f;
else
return 1.0f;
}
void CV_ImageWarpBaseTest::validate_results() const void CV_ImageWarpBaseTest::validate_results() const
{ {
Mat _dst; Mat _dst;
@ -237,15 +252,7 @@ void CV_ImageWarpBaseTest::validate_results() const
Size dsize = dst.size(), ssize = src.size(); Size dsize = dst.size(), ssize = src.size();
int cn = _dst.channels(); int cn = _dst.channels();
dsize.width *= cn; dsize.width *= cn;
float t = 1.0f; float t = get_success_error_level(interpolation & INTER_MAX, dst.depth());
if (interpolation == INTER_CUBIC)
t = 1.0f;
else if (interpolation == INTER_LANCZOS4)
t = 1.0f;
else if (interpolation == INTER_NEAREST)
t = 1.0f;
else if (interpolation == INTER_AREA)
t = 2.0f;
for (int dy = 0; dy < dsize.height; ++dy) for (int dy = 0; dy < dsize.height; ++dy)
{ {
@ -1034,7 +1041,7 @@ public:
protected: protected:
virtual void generate_test_data(); virtual void generate_test_data();
virtual void prepare_test_data_for_reference_func(); virtual float get_success_error_level(int _interpolation, int _depth) const;
virtual void run_func(); virtual void run_func();
virtual void run_reference_func(); virtual void run_reference_func();
@ -1083,16 +1090,16 @@ void CV_WarpAffine_Test::run_func()
cv::warpAffine(src, dst, M, dst.size(), interpolation, borderType, borderValue); cv::warpAffine(src, dst, M, dst.size(), interpolation, borderType, borderValue);
} }
void CV_WarpAffine_Test::prepare_test_data_for_reference_func() float CV_WarpAffine_Test::get_success_error_level(int _interpolation, int _depth) const
{ {
CV_ImageWarpBaseTest::prepare_test_data_for_reference_func(); return _depth == CV_8U ? 0 : CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth);
} }
void CV_WarpAffine_Test::run_reference_func() void CV_WarpAffine_Test::run_reference_func()
{ {
prepare_test_data_for_reference_func(); Mat tmp = Mat::zeros(dst.size(), dst.type());
warpAffine(src, tmp);
warpAffine(src, reference_dst); tmp.convertTo(reference_dst, reference_dst.depth());
} }
void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst) void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
@ -1123,7 +1130,7 @@ void CV_WarpAffine_Test::warpAffine(const Mat& _src, Mat& _dst)
const int AB_SCALE = 1 << AB_BITS; const int AB_SCALE = 1 << AB_BITS;
int round_delta = (inter == INTER_NEAREST) ? AB_SCALE / 2 : (AB_SCALE / INTER_TAB_SIZE / 2); int round_delta = (inter == INTER_NEAREST) ? AB_SCALE / 2 : (AB_SCALE / INTER_TAB_SIZE / 2);
const double* data_tM = tM.ptr<double>(0); const softdouble* data_tM = tM.ptr<softdouble>(0);
for (int dy = 0; dy < dsize.height; ++dy) for (int dy = 0; dy < dsize.height; ++dy)
{ {
short* yM = mapx.ptr<short>(dy); short* yM = mapx.ptr<short>(dy);
@ -1162,6 +1169,7 @@ public:
protected: protected:
virtual void generate_test_data(); virtual void generate_test_data();
virtual float get_success_error_level(int _interpolation, int _depth) const;
virtual void run_func(); virtual void run_func();
virtual void run_reference_func(); virtual void run_reference_func();
@ -1204,11 +1212,16 @@ void CV_WarpPerspective_Test::run_func()
cv::warpPerspective(src, dst, M, dst.size(), interpolation, borderType, borderValue); cv::warpPerspective(src, dst, M, dst.size(), interpolation, borderType, borderValue);
} }
void CV_WarpPerspective_Test::run_reference_func() float CV_WarpPerspective_Test::get_success_error_level(int _interpolation, int _depth) const
{ {
prepare_test_data_for_reference_func(); return CV_ImageWarpBaseTest::get_success_error_level(_interpolation, _depth);
}
warpPerspective(src, reference_dst); void CV_WarpPerspective_Test::run_reference_func()
{
Mat tmp = Mat::zeros(dst.size(), dst.type());
warpPerspective(src, tmp);
tmp.convertTo(reference_dst, reference_dst.depth());
} }
void CV_WarpPerspective_Test::warpPerspective(const Mat& _src, Mat& _dst) void CV_WarpPerspective_Test::warpPerspective(const Mat& _src, Mat& _dst)

@ -482,6 +482,7 @@ node {
name: "conv4_3_norm/l2_normalize" name: "conv4_3_norm/l2_normalize"
op: "L2Normalize" op: "L2Normalize"
input: "Relu_4:0" input: "Relu_4:0"
input: "conv4_3_norm/l2_normalize/Sum/reduction_indices"
} }
node { node {
name: "conv4_3_norm/mul_1" name: "conv4_3_norm/mul_1"

Loading…
Cancel
Save