From 76f7fb5231b1483f9bd3547e370bd59fdd2ff407 Mon Sep 17 00:00:00 2001 From: Boris Fomitchev Date: Sun, 20 Aug 2017 20:31:21 -0700 Subject: [PATCH] Extending CPU dispatch to the tests; fixing a typo --- cmake/OpenCVModule.cmake | 4 + cmake/OpenCVUtils.cmake | 2 +- modules/core/CMakeLists.txt | 8 +- modules/core/test/test_intrin.cpp | 819 +----------------------- modules/core/test/test_intrin.fp16.cpp | 11 + modules/core/test/test_intrin_utils.hpp | 810 +++++++++++++++++++++++ 6 files changed, 831 insertions(+), 823 deletions(-) create mode 100644 modules/core/test/test_intrin.fp16.cpp diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index f560accfbd..6478d11190 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -1047,6 +1047,8 @@ function(ocv_add_perf_tests) set(OPENCV_PERF_${the_module}_SOURCES ${perf_srcs} ${perf_hdrs}) endif() + ocv_compiler_optimization_process_sources(OPENCV_PERF_${the_module}_SOURCES OPENCV_PERF_${the_module}_DEPS ${the_target}) + if(NOT BUILD_opencv_world) get_native_precompiled_header(${the_target} perf_precomp.hpp) endif() @@ -1124,6 +1126,8 @@ function(ocv_add_accuracy_tests) set(OPENCV_TEST_${the_module}_SOURCES ${test_srcs} ${test_hdrs}) endif() + ocv_compiler_optimization_process_sources(OPENCV_TEST_${the_module}_SOURCES OPENCV_TEST_${the_module}_DEPS ${the_target}) + if(NOT BUILD_opencv_world) get_native_precompiled_header(${the_target} test_precomp.hpp) endif() diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index a75e171fb3..fb041c7fea 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -387,7 +387,7 @@ macro(ocv_warnings_disable) endif(NOT ENABLE_NOISY_WARNINGS) endmacro() -macro(ocv_append_sourge_file_compile_definitions source) +macro(ocv_append_source_file_compile_definitions source) get_source_file_property(_value "${source}" COMPILE_DEFINITIONS) if(_value) set(_value ${_value} ${ARGN}) diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 4d6b7bbd82..5c73c8ee77 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -38,20 +38,20 @@ ocv_glob_module_sources(SOURCES "${OPENCV_MODULE_opencv_core_BINARY_DIR}/version ocv_module_include_directories(${the_module} ${ZLIB_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS}) if(ANDROID AND HAVE_CPUFEATURES) - ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/system.cpp "HAVE_CPUFEATURES=1") + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/system.cpp "HAVE_CPUFEATURES=1") ocv_module_include_directories(${CPUFEATURES_INCLUDE_DIRS}) endif() if(ITT_INCLUDE_DIRS) ocv_module_include_directories(${ITT_INCLUDE_DIRS}) endif() if(HAVE_POSIX_MEMALIGN) - ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_POSIX_MEMALIGN=1") + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_POSIX_MEMALIGN=1") endif() if(HAVE_MALLOC_H) - ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MALLOC_H=1") + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MALLOC_H=1") endif() if(HAVE_MEMALIGN) - ocv_append_sourge_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MEMALIGN=1") + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MEMALIGN=1") endif() ocv_create_module(${extra_libs}) diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index 2ef4b8c33a..e055edd9f0 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -6,812 +6,6 @@ using namespace cv; namespace cvtest { namespace hal { -template static inline void EXPECT_COMPARE_EQ_(const T a, const T b); -template<> inline void EXPECT_COMPARE_EQ_(const float a, const float b) -{ - EXPECT_FLOAT_EQ( a, b ); -} - -template<> inline void EXPECT_COMPARE_EQ_(const double a, const double b) -{ - EXPECT_DOUBLE_EQ( a, b ); -} - -template struct TheTest -{ - typedef typename R::lane_type LaneType; - - template - static inline void EXPECT_COMPARE_EQ(const T1 a, const T2 b) - { - EXPECT_COMPARE_EQ_((LaneType)a, (LaneType)b); - } - - TheTest & test_loadstore() - { - AlignedData data; - AlignedData out; - - // check if addresses are aligned and unaligned respectively - EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); - EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); - - // check some initialization methods - R r1 = data.a; - R r2 = v_load(data.u.d); - R r3 = v_load_aligned(data.a.d); - R r4(r2); - EXPECT_EQ(data.a[0], r1.get0()); - EXPECT_EQ(data.u[0], r2.get0()); - EXPECT_EQ(data.a[0], r3.get0()); - EXPECT_EQ(data.u[0], r4.get0()); - - // check some store methods - out.u.clear(); - out.a.clear(); - v_store(out.u.d, r1); - v_store_aligned(out.a.d, r2); - EXPECT_EQ(data.a, out.a); - EXPECT_EQ(data.u, out.u); - - // check more store methods - Data d, res(0); - R r5 = d; - v_store_high(res.mid(), r5); - v_store_low(res.d, r5); - EXPECT_EQ(d, res); - - // check halves load correctness - res.clear(); - R r6 = v_load_halves(d.d, d.mid()); - v_store(res.d, r6); - EXPECT_EQ(d, res); - - // zero, all - Data resZ = V_RegTrait128::zero(); - Data resV = V_RegTrait128::all(8); - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ((LaneType)0, resZ[i]); - EXPECT_EQ((LaneType)8, resV[i]); - } - - // reinterpret_as - v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a); - v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a); - v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a); - v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a); - v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a); - v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a); - v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a); - v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a); - v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a); -#if CV_SIMD128_64F - v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a); -#endif - - return *this; - } - - TheTest & test_interleave() - { - Data data1, data2, data3, data4; - data2 += 20; - data3 += 40; - data4 += 60; - - - R a = data1, b = data2, c = data3; - R d = data1, e = data2, f = data3, g = data4; - - LaneType buf3[R::nlanes * 3]; - LaneType buf4[R::nlanes * 4]; - - v_store_interleave(buf3, a, b, c); - v_store_interleave(buf4, d, e, f, g); - - Data z(0); - a = b = c = d = e = f = g = z; - - v_load_deinterleave(buf3, a, b, c); - v_load_deinterleave(buf4, d, e, f, g); - - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(data1, Data(a)); - EXPECT_EQ(data2, Data(b)); - EXPECT_EQ(data3, Data(c)); - - EXPECT_EQ(data1, Data(d)); - EXPECT_EQ(data2, Data(e)); - EXPECT_EQ(data3, Data(f)); - EXPECT_EQ(data4, Data(g)); - } - - return *this; - } - - // float32x4 only - TheTest & test_interleave_2channel() - { - Data data1, data2; - data2 += 20; - - R a = data1, b = data2; - - LaneType buf2[R::nlanes * 2]; - - v_store_interleave(buf2, a, b); - - Data z(0); - a = b = z; - - v_load_deinterleave(buf2, a, b); - - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(data1, Data(a)); - EXPECT_EQ(data2, Data(b)); - } - - return *this; - } - - // v_expand and v_load_expand - TheTest & test_expand() - { - typedef typename V_RegTrait128::w_reg Rx2; - Data dataA; - R a = dataA; - - Data resB = v_load_expand(dataA.d); - - Rx2 c, d; - v_expand(a, c, d); - - Data resC = c, resD = d; - const int n = Rx2::nlanes; - for (int i = 0; i < n; ++i) - { - EXPECT_EQ(dataA[i], resB[i]); - EXPECT_EQ(dataA[i], resC[i]); - EXPECT_EQ(dataA[i + n], resD[i]); - } - - return *this; - } - - TheTest & test_expand_q() - { - typedef typename V_RegTrait128::q_reg Rx4; - Data data; - Data out = v_load_expand_q(data.d); - const int n = Rx4::nlanes; - for (int i = 0; i < n; ++i) - EXPECT_EQ(data[i], out[i]); - - return *this; - } - - TheTest & test_addsub() - { - Data dataA, dataB; - dataB.reverse(); - R a = dataA, b = dataB; - - Data resC = a + b, resD = a - b; - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(saturate_cast(dataA[i] + dataB[i]), resC[i]); - EXPECT_EQ(saturate_cast(dataA[i] - dataB[i]), resD[i]); - } - - return *this; - } - - TheTest & test_addsub_wrap() - { - Data dataA, dataB; - dataB.reverse(); - R a = dataA, b = dataB; - - Data resC = v_add_wrap(a, b), - resD = v_sub_wrap(a, b); - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]); - EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]); - } - return *this; - } - - TheTest & test_mul() - { - Data dataA, dataB; - dataB.reverse(); - R a = dataA, b = dataB; - - Data resC = a * b; - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(dataA[i] * dataB[i], resC[i]); - } - - return *this; - } - - TheTest & test_div() - { - Data dataA, dataB; - dataB.reverse(); - R a = dataA, b = dataB; - - Data resC = a / b; - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(dataA[i] / dataB[i], resC[i]); - } - - return *this; - } - - TheTest & test_mul_expand() - { - typedef typename V_RegTrait128::w_reg Rx2; - Data dataA, dataB(2); - R a = dataA, b = dataB; - Rx2 c, d; - - v_mul_expand(a, b, c, d); - - Data resC = c, resD = d; - const int n = R::nlanes / 2; - for (int i = 0; i < n; ++i) - { - EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]); - EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]); - } - - return *this; - } - - TheTest & test_abs() - { - typedef typename V_RegTrait128::u_reg Ru; - typedef typename Ru::lane_type u_type; - Data dataA, dataB(10); - R a = dataA, b = dataB; - a = a - b; - - Data resC = v_abs(a); - - for (int i = 0; i < Ru::nlanes; ++i) - { - EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]); - } - - return *this; - } - - template - TheTest & test_shift() - { - Data dataA; - R a = dataA; - - Data resB = a << s, resC = v_shl(a), resD = a >> s, resE = v_shr(a); - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(dataA[i] << s, resB[i]); - EXPECT_EQ(dataA[i] << s, resC[i]); - EXPECT_EQ(dataA[i] >> s, resD[i]); - EXPECT_EQ(dataA[i] >> s, resE[i]); - } - return *this; - } - - TheTest & test_cmp() - { - Data dataA, dataB; - dataB.reverse(); - dataB += 1; - R a = dataA, b = dataB; - - Data resC = (a == b); - Data resD = (a != b); - Data resE = (a > b); - Data resF = (a >= b); - Data resG = (a < b); - Data resH = (a <= b); - - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); - EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); - EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0); - EXPECT_EQ(dataA[i] >= dataB[i], resF[i] != 0); - EXPECT_EQ(dataA[i] < dataB[i], resG[i] != 0); - EXPECT_EQ(dataA[i] <= dataB[i], resH[i] != 0); - } - return *this; - } - - TheTest & test_dot_prod() - { - typedef typename V_RegTrait128::w_reg Rx2; - Data dataA, dataB(2); - R a = dataA, b = dataB; - - Data res = v_dotprod(a, b); - - const int n = R::nlanes / 2; - for (int i = 0; i < n; ++i) - { - EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], res[i]); - } - return *this; - } - - TheTest & test_logic() - { - Data dataA, dataB(2); - R a = dataA, b = dataB; - - Data resC = a & b, resD = a | b, resE = a ^ b, resF = ~a; - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(dataA[i] & dataB[i], resC[i]); - EXPECT_EQ(dataA[i] | dataB[i], resD[i]); - EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]); - EXPECT_EQ((LaneType)~dataA[i], resF[i]); - } - - return *this; - } - - TheTest & test_sqrt_abs() - { - Data dataA, dataD; - dataD *= -1.0; - R a = dataA, d = dataD; - - Data resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d); - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]); - EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]); - EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]); - } - - return *this; - } - - TheTest & test_min_max() - { - Data dataA, dataB; - dataB.reverse(); - R a = dataA, b = dataB; - - Data resC = v_min(a, b), resD = v_max(a, b); - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]); - EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]); - } - - return *this; - } - - TheTest & test_popcount() - { - static unsigned popcountTable[] = {0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33}; - Data dataA; - R a = dataA; - - unsigned resB = (unsigned)v_reduce_sum(v_popcount(a)); - EXPECT_EQ(popcountTable[R::nlanes], resB); - - return *this; - } - - TheTest & test_absdiff() - { - typedef typename V_RegTrait128::u_reg Ru; - typedef typename Ru::lane_type u_type; - Data dataA(std::numeric_limits::max()), - dataB(std::numeric_limits::min()); - dataA[0] = (LaneType)-1; - dataB[0] = 1; - dataA[1] = 2; - dataB[1] = (LaneType)-2; - R a = dataA, b = dataB; - Data resC = v_absdiff(a, b); - const u_type mask = std::numeric_limits::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0; - for (int i = 0; i < Ru::nlanes; ++i) - { - u_type uA = dataA[i] ^ mask; - u_type uB = dataB[i] ^ mask; - EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]); - } - return *this; - } - - TheTest & test_float_absdiff() - { - Data dataA(std::numeric_limits::max()), - dataB(std::numeric_limits::min()); - dataA[0] = -1; - dataB[0] = 1; - dataA[1] = 2; - dataB[1] = -2; - R a = dataA, b = dataB; - Data resC = v_absdiff(a, b); - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]); - } - return *this; - } - - TheTest & test_reduce() - { - Data dataA; - R a = dataA; - EXPECT_EQ((LaneType)1, v_reduce_min(a)); - EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a)); - EXPECT_EQ((LaneType)((1 + R::nlanes)*R::nlanes/2), v_reduce_sum(a)); - return *this; - } - - TheTest & test_mask() - { - Data dataA, dataB, dataC, dataD(1), dataE(2); - dataA[1] *= (LaneType)-1; - dataC *= (LaneType)-1; - R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE; - - int m = v_signmask(a); - EXPECT_EQ(2, m); - - EXPECT_EQ(false, v_check_all(a)); - EXPECT_EQ(false, v_check_all(b)); - EXPECT_EQ(true, v_check_all(c)); - - EXPECT_EQ(true, v_check_any(a)); - EXPECT_EQ(false, v_check_any(b)); - EXPECT_EQ(true, v_check_any(c)); - - typedef V_TypeTraits Traits; - typedef typename Traits::int_type int_type; - - R f = v_select(b, d, e); - Data resF = f; - for (int i = 0; i < R::nlanes; ++i) - { - int_type m2 = Traits::reinterpret_int(dataB[i]); - EXPECT_EQ((Traits::reinterpret_int(dataD[i]) & m2) - | (Traits::reinterpret_int(dataE[i]) & ~m2), - Traits::reinterpret_int(resF[i])); - } - - return *this; - } - - template - TheTest & test_pack() - { - typedef typename V_RegTrait128::w_reg Rx2; - typedef typename Rx2::lane_type w_type; - Data dataA, dataB; - dataA += std::numeric_limits::is_signed ? -10 : 10; - dataB *= 10; - Rx2 a = dataA, b = dataB; - - Data resC = v_pack(a, b); - Data resD = v_rshr_pack(a, b); - - Data resE(0); - v_pack_store(resE.d, b); - - Data resF(0); - v_rshr_pack_store(resF.d, b); - - const int n = Rx2::nlanes; - const w_type add = (w_type)1 << (s - 1); - for (int i = 0; i < n; ++i) - { - EXPECT_EQ(saturate_cast(dataA[i]), resC[i]); - EXPECT_EQ(saturate_cast(dataB[i]), resC[i + n]); - EXPECT_EQ(saturate_cast((dataA[i] + add) >> s), resD[i]); - EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resD[i + n]); - EXPECT_EQ(saturate_cast(dataB[i]), resE[i]); - EXPECT_EQ((LaneType)0, resE[i + n]); - EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resF[i]); - EXPECT_EQ((LaneType)0, resF[i + n]); - } - return *this; - } - - template - TheTest & test_pack_u() - { - typedef typename V_TypeTraits::w_type LaneType_w; - typedef typename V_RegTrait128::int_reg Ri2; - typedef typename Ri2::lane_type w_type; - - Data dataA, dataB; - dataA += -10; - dataB *= 10; - Ri2 a = dataA, b = dataB; - - Data resC = v_pack_u(a, b); - Data resD = v_rshr_pack_u(a, b); - - Data resE(0); - v_pack_u_store(resE.d, b); - - Data resF(0); - v_rshr_pack_u_store(resF.d, b); - - const int n = Ri2::nlanes; - const w_type add = (w_type)1 << (s - 1); - for (int i = 0; i < n; ++i) - { - EXPECT_EQ(saturate_cast(dataA[i]), resC[i]); - EXPECT_EQ(saturate_cast(dataB[i]), resC[i + n]); - EXPECT_EQ(saturate_cast((dataA[i] + add) >> s), resD[i]); - EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resD[i + n]); - EXPECT_EQ(saturate_cast(dataB[i]), resE[i]); - EXPECT_EQ((LaneType)0, resE[i + n]); - EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resF[i]); - EXPECT_EQ((LaneType)0, resF[i + n]); - } - return *this; - } - - TheTest & test_unpack() - { - Data dataA, dataB; - dataB *= 10; - R a = dataA, b = dataB; - - R c, d, e, f, lo, hi; - v_zip(a, b, c, d); - v_recombine(a, b, e, f); - lo = v_combine_low(a, b); - hi = v_combine_high(a, b); - - Data resC = c, resD = d, resE = e, resF = f, resLo = lo, resHi = hi; - - const int n = R::nlanes/2; - for (int i = 0; i < n; ++i) - { - EXPECT_EQ(dataA[i], resC[i*2]); - EXPECT_EQ(dataB[i], resC[i*2+1]); - EXPECT_EQ(dataA[i+n], resD[i*2]); - EXPECT_EQ(dataB[i+n], resD[i*2+1]); - - EXPECT_EQ(dataA[i], resE[i]); - EXPECT_EQ(dataB[i], resE[i+n]); - EXPECT_EQ(dataA[i+n], resF[i]); - EXPECT_EQ(dataB[i+n], resF[i+n]); - - EXPECT_EQ(dataA[i], resLo[i]); - EXPECT_EQ(dataB[i], resLo[i+n]); - EXPECT_EQ(dataA[i+n], resHi[i]); - EXPECT_EQ(dataB[i+n], resHi[i+n]); - } - - return *this; - } - - template - TheTest & test_extract() - { - Data dataA, dataB; - dataB *= 10; - R a = dataA, b = dataB; - - Data resC = v_extract(a, b); - - for (int i = 0; i < R::nlanes; ++i) - { - if (i + s >= R::nlanes) - EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]); - else - EXPECT_EQ(dataA[i + s], resC[i]); - } - - return *this; - } - - TheTest & test_float_math() - { - typedef typename V_RegTrait128::int_reg Ri; - Data data1, data2, data3; - data1 *= 1.1; - data2 += 10; - R a1 = data1, a2 = data2, a3 = data3; - - Data resB = v_round(a1), - resC = v_trunc(a1), - resD = v_floor(a1), - resE = v_ceil(a1); - - Data resF = v_magnitude(a1, a2), - resG = v_sqr_magnitude(a1, a2), - resH = v_muladd(a1, a2, a3); - - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(cvRound(data1[i]), resB[i]); - EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]); - EXPECT_EQ(cvFloor(data1[i]), resD[i]); - EXPECT_EQ(cvCeil(data1[i]), resE[i]); - - EXPECT_COMPARE_EQ(std::sqrt(data1[i]*data1[i] + data2[i]*data2[i]), resF[i]); - EXPECT_COMPARE_EQ(data1[i]*data1[i] + data2[i]*data2[i], resG[i]); - EXPECT_COMPARE_EQ(data1[i]*data2[i] + data3[i], resH[i]); - } - - return *this; - } - - TheTest & test_float_cvt32() - { - typedef v_float32x4 Rt; - Data dataA; - dataA *= 1.1; - R a = dataA; - Rt b = v_cvt_f32(a); - Data resB = b; - int n = std::min(Rt::nlanes, R::nlanes); - for (int i = 0; i < n; ++i) - { - EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); - } - return *this; - } - - TheTest & test_float_cvt64() - { -#if CV_SIMD128_64F - typedef v_float64x2 Rt; - Data dataA; - dataA *= 1.1; - R a = dataA; - Rt b = v_cvt_f64(a); - Rt c = v_cvt_f64_high(a); - Data resB = b; - Data resC = c; - int n = std::min(Rt::nlanes, R::nlanes); - for (int i = 0; i < n; ++i) - { - EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); - } - for (int i = 0; i < n; ++i) - { - EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]); - } -#endif - return *this; - } - - TheTest & test_matmul() - { - Data dataV, dataA, dataB, dataC, dataD; - dataB.reverse(); - dataC += 2; - dataD *= 0.3; - R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD; - - Data res = v_matmul(v, a, b, c, d); - for (int i = 0; i < R::nlanes; ++i) - { - LaneType val = dataV[0] * dataA[i] - + dataV[1] * dataB[i] - + dataV[2] * dataC[i] - + dataV[3] * dataD[i]; - EXPECT_DOUBLE_EQ(val, res[i]); - } - return *this; - } - - TheTest & test_transpose() - { - Data dataA, dataB, dataC, dataD; - dataB *= 5; - dataC *= 10; - dataD *= 15; - R a = dataA, b = dataB, c = dataC, d = dataD; - R e, f, g, h; - v_transpose4x4(a, b, c, d, - e, f, g, h); - - Data res[4] = {e, f, g, h}; - for (int i = 0; i < R::nlanes; ++i) - { - EXPECT_EQ(dataA[i], res[i][0]); - EXPECT_EQ(dataB[i], res[i][1]); - EXPECT_EQ(dataC[i], res[i][2]); - EXPECT_EQ(dataD[i], res[i][3]); - } - return *this; - } - - TheTest & test_reduce_sum4() - { - R a(0.1f, 0.02f, 0.003f, 0.0004f); - R b(1, 20, 300, 4000); - R c(10, 2, 0.3f, 0.04f); - R d(1, 2, 3, 4); - - R sum = v_reduce_sum4(a, b, c, d); - - Data res = sum; - EXPECT_EQ(0.1234f, res[0]); - EXPECT_EQ(4321.0f, res[1]); - EXPECT_EQ(12.34f, res[2]); - EXPECT_EQ(10.0f, res[3]); - return *this; - } - - TheTest & test_loadstore_fp16() - { -#if CV_FP16 && CV_SIMD128 - AlignedData data; - AlignedData out; - - if(checkHardwareSupport(CV_CPU_FP16)) - { - // check if addresses are aligned and unaligned respectively - EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); - EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); - - // check some initialization methods - R r1 = data.u; - R r2 = v_load_f16(data.a.d); - R r3(r2); - EXPECT_EQ(data.u[0], r1.get0()); - EXPECT_EQ(data.a[0], r2.get0()); - EXPECT_EQ(data.a[0], r3.get0()); - - // check some store methods - out.a.clear(); - v_store_f16(out.a.d, r1); - EXPECT_EQ(data.a, out.a); - } - - return *this; -#endif - } - - TheTest & test_float_cvt_fp16() - { -#if CV_FP16 && CV_SIMD128 - AlignedData data; - - if(checkHardwareSupport(CV_CPU_FP16)) - { - // check conversion - v_float32x4 r1 = v_load(data.a.d); - v_float16x4 r2 = v_cvt_f16(r1); - v_float32x4 r3 = v_cvt_f32(r2); - EXPECT_EQ(0x3c00, r2.get0()); - EXPECT_EQ(r3.get0(), r1.get0()); - } - - return *this; -#endif - } - -}; - - //============= 8-bit integer ===================================================================== TEST(hal_intrin, uint8x16) { @@ -1026,15 +220,4 @@ TEST(hal_intrin, float64x2) { } #endif -#if CV_FP16 && CV_SIMD128 -TEST(hal_intrin, float16x4) { - TheTest() - .test_loadstore_fp16() - .test_float_cvt_fp16() - ; -} -#endif - -}; - -}; +}} diff --git a/modules/core/test/test_intrin.fp16.cpp b/modules/core/test/test_intrin.fp16.cpp new file mode 100644 index 0000000000..4f277642ec --- /dev/null +++ b/modules/core/test/test_intrin.fp16.cpp @@ -0,0 +1,11 @@ +#include "test_precomp.hpp" +#include "test_intrin_utils.hpp" + +namespace cvtest { namespace hal { +TEST(hal_intrin, float16x4) { + TheTest() + .test_loadstore_fp16() + .test_float_cvt_fp16() + ; +} +}} diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 1f8a78d98d..817dad798c 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -5,6 +5,7 @@ #include "opencv2/ts.hpp" #include #include +#include template struct Data; template struct initializer; @@ -155,4 +156,813 @@ template std::ostream & operator<<(std::ostream & out, const Data static inline void EXPECT_COMPARE_EQ_(const T a, const T b); +template<> inline void EXPECT_COMPARE_EQ_(const float a, const float b) +{ + EXPECT_FLOAT_EQ( a, b ); +} + +template<> inline void EXPECT_COMPARE_EQ_(const double a, const double b) +{ + EXPECT_DOUBLE_EQ( a, b ); +} + +template struct TheTest +{ + typedef typename R::lane_type LaneType; + + template + static inline void EXPECT_COMPARE_EQ(const T1 a, const T2 b) + { + EXPECT_COMPARE_EQ_((LaneType)a, (LaneType)b); + } + + TheTest & test_loadstore() + { + AlignedData data; + AlignedData out; + + // check if addresses are aligned and unaligned respectively + EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); + EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); + EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); + EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); + + // check some initialization methods + R r1 = data.a; + R r2 = v_load(data.u.d); + R r3 = v_load_aligned(data.a.d); + R r4(r2); + EXPECT_EQ(data.a[0], r1.get0()); + EXPECT_EQ(data.u[0], r2.get0()); + EXPECT_EQ(data.a[0], r3.get0()); + EXPECT_EQ(data.u[0], r4.get0()); + + // check some store methods + out.u.clear(); + out.a.clear(); + v_store(out.u.d, r1); + v_store_aligned(out.a.d, r2); + EXPECT_EQ(data.a, out.a); + EXPECT_EQ(data.u, out.u); + + // check more store methods + Data d, res(0); + R r5 = d; + v_store_high(res.mid(), r5); + v_store_low(res.d, r5); + EXPECT_EQ(d, res); + + // check halves load correctness + res.clear(); + R r6 = v_load_halves(d.d, d.mid()); + v_store(res.d, r6); + EXPECT_EQ(d, res); + + // zero, all + Data resZ = V_RegTrait128::zero(); + Data resV = V_RegTrait128::all(8); + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ((LaneType)0, resZ[i]); + EXPECT_EQ((LaneType)8, resV[i]); + } + + // reinterpret_as + v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a); + v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a); + v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a); + v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a); + v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a); + v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a); + v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a); + v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a); + v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a); +#if CV_SIMD128_64F + v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a); +#endif + + return *this; + } + + TheTest & test_interleave() + { + Data data1, data2, data3, data4; + data2 += 20; + data3 += 40; + data4 += 60; + + + R a = data1, b = data2, c = data3; + R d = data1, e = data2, f = data3, g = data4; + + LaneType buf3[R::nlanes * 3]; + LaneType buf4[R::nlanes * 4]; + + v_store_interleave(buf3, a, b, c); + v_store_interleave(buf4, d, e, f, g); + + Data z(0); + a = b = c = d = e = f = g = z; + + v_load_deinterleave(buf3, a, b, c); + v_load_deinterleave(buf4, d, e, f, g); + + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(data1, Data(a)); + EXPECT_EQ(data2, Data(b)); + EXPECT_EQ(data3, Data(c)); + + EXPECT_EQ(data1, Data(d)); + EXPECT_EQ(data2, Data(e)); + EXPECT_EQ(data3, Data(f)); + EXPECT_EQ(data4, Data(g)); + } + + return *this; + } + + // float32x4 only + TheTest & test_interleave_2channel() + { + Data data1, data2; + data2 += 20; + + R a = data1, b = data2; + + LaneType buf2[R::nlanes * 2]; + + v_store_interleave(buf2, a, b); + + Data z(0); + a = b = z; + + v_load_deinterleave(buf2, a, b); + + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(data1, Data(a)); + EXPECT_EQ(data2, Data(b)); + } + + return *this; + } + + // v_expand and v_load_expand + TheTest & test_expand() + { + typedef typename V_RegTrait128::w_reg Rx2; + Data dataA; + R a = dataA; + + Data resB = v_load_expand(dataA.d); + + Rx2 c, d; + v_expand(a, c, d); + + Data resC = c, resD = d; + const int n = Rx2::nlanes; + for (int i = 0; i < n; ++i) + { + EXPECT_EQ(dataA[i], resB[i]); + EXPECT_EQ(dataA[i], resC[i]); + EXPECT_EQ(dataA[i + n], resD[i]); + } + + return *this; + } + + TheTest & test_expand_q() + { + typedef typename V_RegTrait128::q_reg Rx4; + Data data; + Data out = v_load_expand_q(data.d); + const int n = Rx4::nlanes; + for (int i = 0; i < n; ++i) + EXPECT_EQ(data[i], out[i]); + + return *this; + } + + TheTest & test_addsub() + { + Data dataA, dataB; + dataB.reverse(); + R a = dataA, b = dataB; + + Data resC = a + b, resD = a - b; + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(saturate_cast(dataA[i] + dataB[i]), resC[i]); + EXPECT_EQ(saturate_cast(dataA[i] - dataB[i]), resD[i]); + } + + return *this; + } + + TheTest & test_addsub_wrap() + { + Data dataA, dataB; + dataB.reverse(); + R a = dataA, b = dataB; + + Data resC = v_add_wrap(a, b), + resD = v_sub_wrap(a, b); + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ((LaneType)(dataA[i] + dataB[i]), resC[i]); + EXPECT_EQ((LaneType)(dataA[i] - dataB[i]), resD[i]); + } + return *this; + } + + TheTest & test_mul() + { + Data dataA, dataB; + dataB.reverse(); + R a = dataA, b = dataB; + + Data resC = a * b; + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(dataA[i] * dataB[i], resC[i]); + } + + return *this; + } + + TheTest & test_div() + { + Data dataA, dataB; + dataB.reverse(); + R a = dataA, b = dataB; + + Data resC = a / b; + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(dataA[i] / dataB[i], resC[i]); + } + + return *this; + } + + TheTest & test_mul_expand() + { + typedef typename V_RegTrait128::w_reg Rx2; + Data dataA, dataB(2); + R a = dataA, b = dataB; + Rx2 c, d; + + v_mul_expand(a, b, c, d); + + Data resC = c, resD = d; + const int n = R::nlanes / 2; + for (int i = 0; i < n; ++i) + { + EXPECT_EQ((typename Rx2::lane_type)dataA[i] * dataB[i], resC[i]); + EXPECT_EQ((typename Rx2::lane_type)dataA[i + n] * dataB[i + n], resD[i]); + } + + return *this; + } + + TheTest & test_abs() + { + typedef typename V_RegTrait128::u_reg Ru; + typedef typename Ru::lane_type u_type; + Data dataA, dataB(10); + R a = dataA, b = dataB; + a = a - b; + + Data resC = v_abs(a); + + for (int i = 0; i < Ru::nlanes; ++i) + { + EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]); + } + + return *this; + } + + template + TheTest & test_shift() + { + Data dataA; + R a = dataA; + + Data resB = a << s, resC = v_shl(a), resD = a >> s, resE = v_shr(a); + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(dataA[i] << s, resB[i]); + EXPECT_EQ(dataA[i] << s, resC[i]); + EXPECT_EQ(dataA[i] >> s, resD[i]); + EXPECT_EQ(dataA[i] >> s, resE[i]); + } + return *this; + } + + TheTest & test_cmp() + { + Data dataA, dataB; + dataB.reverse(); + dataB += 1; + R a = dataA, b = dataB; + + Data resC = (a == b); + Data resD = (a != b); + Data resE = (a > b); + Data resF = (a >= b); + Data resG = (a < b); + Data resH = (a <= b); + + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); + EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); + EXPECT_EQ(dataA[i] > dataB[i], resE[i] != 0); + EXPECT_EQ(dataA[i] >= dataB[i], resF[i] != 0); + EXPECT_EQ(dataA[i] < dataB[i], resG[i] != 0); + EXPECT_EQ(dataA[i] <= dataB[i], resH[i] != 0); + } + return *this; + } + + TheTest & test_dot_prod() + { + typedef typename V_RegTrait128::w_reg Rx2; + Data dataA, dataB(2); + R a = dataA, b = dataB; + + Data res = v_dotprod(a, b); + + const int n = R::nlanes / 2; + for (int i = 0; i < n; ++i) + { + EXPECT_EQ(dataA[i*2] * dataB[i*2] + dataA[i*2 + 1] * dataB[i*2 + 1], res[i]); + } + return *this; + } + + TheTest & test_logic() + { + Data dataA, dataB(2); + R a = dataA, b = dataB; + + Data resC = a & b, resD = a | b, resE = a ^ b, resF = ~a; + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(dataA[i] & dataB[i], resC[i]); + EXPECT_EQ(dataA[i] | dataB[i], resD[i]); + EXPECT_EQ(dataA[i] ^ dataB[i], resE[i]); + EXPECT_EQ((LaneType)~dataA[i], resF[i]); + } + + return *this; + } + + TheTest & test_sqrt_abs() + { + Data dataA, dataD; + dataD *= -1.0; + R a = dataA, d = dataD; + + Data resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d); + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]); + EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]); + EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]); + } + + return *this; + } + + TheTest & test_min_max() + { + Data dataA, dataB; + dataB.reverse(); + R a = dataA, b = dataB; + + Data resC = v_min(a, b), resD = v_max(a, b); + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]); + EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]); + } + + return *this; + } + + TheTest & test_popcount() + { + static unsigned popcountTable[] = {0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33}; + Data dataA; + R a = dataA; + + unsigned resB = (unsigned)v_reduce_sum(v_popcount(a)); + EXPECT_EQ(popcountTable[R::nlanes], resB); + + return *this; + } + + TheTest & test_absdiff() + { + typedef typename V_RegTrait128::u_reg Ru; + typedef typename Ru::lane_type u_type; + Data dataA(std::numeric_limits::max()), + dataB(std::numeric_limits::min()); + dataA[0] = (LaneType)-1; + dataB[0] = 1; + dataA[1] = 2; + dataB[1] = (LaneType)-2; + R a = dataA, b = dataB; + Data resC = v_absdiff(a, b); + const u_type mask = std::numeric_limits::is_signed ? (u_type)(1 << (sizeof(u_type)*8 - 1)) : 0; + for (int i = 0; i < Ru::nlanes; ++i) + { + u_type uA = dataA[i] ^ mask; + u_type uB = dataB[i] ^ mask; + EXPECT_EQ(uA > uB ? uA - uB : uB - uA, resC[i]); + } + return *this; + } + + TheTest & test_float_absdiff() + { + Data dataA(std::numeric_limits::max()), + dataB(std::numeric_limits::min()); + dataA[0] = -1; + dataB[0] = 1; + dataA[1] = 2; + dataB[1] = -2; + R a = dataA, b = dataB; + Data resC = v_absdiff(a, b); + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(dataA[i] > dataB[i] ? dataA[i] - dataB[i] : dataB[i] - dataA[i], resC[i]); + } + return *this; + } + + TheTest & test_reduce() + { + Data dataA; + R a = dataA; + EXPECT_EQ((LaneType)1, v_reduce_min(a)); + EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a)); + EXPECT_EQ((LaneType)((1 + R::nlanes)*R::nlanes/2), v_reduce_sum(a)); + return *this; + } + + TheTest & test_mask() + { + Data dataA, dataB, dataC, dataD(1), dataE(2); + dataA[1] *= (LaneType)-1; + dataC *= (LaneType)-1; + R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE; + + int m = v_signmask(a); + EXPECT_EQ(2, m); + + EXPECT_EQ(false, v_check_all(a)); + EXPECT_EQ(false, v_check_all(b)); + EXPECT_EQ(true, v_check_all(c)); + + EXPECT_EQ(true, v_check_any(a)); + EXPECT_EQ(false, v_check_any(b)); + EXPECT_EQ(true, v_check_any(c)); + + typedef V_TypeTraits Traits; + typedef typename Traits::int_type int_type; + + R f = v_select(b, d, e); + Data resF = f; + for (int i = 0; i < R::nlanes; ++i) + { + int_type m2 = Traits::reinterpret_int(dataB[i]); + EXPECT_EQ((Traits::reinterpret_int(dataD[i]) & m2) + | (Traits::reinterpret_int(dataE[i]) & ~m2), + Traits::reinterpret_int(resF[i])); + } + + return *this; + } + + template + TheTest & test_pack() + { + typedef typename V_RegTrait128::w_reg Rx2; + typedef typename Rx2::lane_type w_type; + Data dataA, dataB; + dataA += std::numeric_limits::is_signed ? -10 : 10; + dataB *= 10; + Rx2 a = dataA, b = dataB; + + Data resC = v_pack(a, b); + Data resD = v_rshr_pack(a, b); + + Data resE(0); + v_pack_store(resE.d, b); + + Data resF(0); + v_rshr_pack_store(resF.d, b); + + const int n = Rx2::nlanes; + const w_type add = (w_type)1 << (s - 1); + for (int i = 0; i < n; ++i) + { + EXPECT_EQ(saturate_cast(dataA[i]), resC[i]); + EXPECT_EQ(saturate_cast(dataB[i]), resC[i + n]); + EXPECT_EQ(saturate_cast((dataA[i] + add) >> s), resD[i]); + EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resD[i + n]); + EXPECT_EQ(saturate_cast(dataB[i]), resE[i]); + EXPECT_EQ((LaneType)0, resE[i + n]); + EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resF[i]); + EXPECT_EQ((LaneType)0, resF[i + n]); + } + return *this; + } + + template + TheTest & test_pack_u() + { + typedef typename V_TypeTraits::w_type LaneType_w; + typedef typename V_RegTrait128::int_reg Ri2; + typedef typename Ri2::lane_type w_type; + + Data dataA, dataB; + dataA += -10; + dataB *= 10; + Ri2 a = dataA, b = dataB; + + Data resC = v_pack_u(a, b); + Data resD = v_rshr_pack_u(a, b); + + Data resE(0); + v_pack_u_store(resE.d, b); + + Data resF(0); + v_rshr_pack_u_store(resF.d, b); + + const int n = Ri2::nlanes; + const w_type add = (w_type)1 << (s - 1); + for (int i = 0; i < n; ++i) + { + EXPECT_EQ(saturate_cast(dataA[i]), resC[i]); + EXPECT_EQ(saturate_cast(dataB[i]), resC[i + n]); + EXPECT_EQ(saturate_cast((dataA[i] + add) >> s), resD[i]); + EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resD[i + n]); + EXPECT_EQ(saturate_cast(dataB[i]), resE[i]); + EXPECT_EQ((LaneType)0, resE[i + n]); + EXPECT_EQ(saturate_cast((dataB[i] + add) >> s), resF[i]); + EXPECT_EQ((LaneType)0, resF[i + n]); + } + return *this; + } + + TheTest & test_unpack() + { + Data dataA, dataB; + dataB *= 10; + R a = dataA, b = dataB; + + R c, d, e, f, lo, hi; + v_zip(a, b, c, d); + v_recombine(a, b, e, f); + lo = v_combine_low(a, b); + hi = v_combine_high(a, b); + + Data resC = c, resD = d, resE = e, resF = f, resLo = lo, resHi = hi; + + const int n = R::nlanes/2; + for (int i = 0; i < n; ++i) + { + EXPECT_EQ(dataA[i], resC[i*2]); + EXPECT_EQ(dataB[i], resC[i*2+1]); + EXPECT_EQ(dataA[i+n], resD[i*2]); + EXPECT_EQ(dataB[i+n], resD[i*2+1]); + + EXPECT_EQ(dataA[i], resE[i]); + EXPECT_EQ(dataB[i], resE[i+n]); + EXPECT_EQ(dataA[i+n], resF[i]); + EXPECT_EQ(dataB[i+n], resF[i+n]); + + EXPECT_EQ(dataA[i], resLo[i]); + EXPECT_EQ(dataB[i], resLo[i+n]); + EXPECT_EQ(dataA[i+n], resHi[i]); + EXPECT_EQ(dataB[i+n], resHi[i+n]); + } + + return *this; + } + + template + TheTest & test_extract() + { + Data dataA, dataB; + dataB *= 10; + R a = dataA, b = dataB; + + Data resC = v_extract(a, b); + + for (int i = 0; i < R::nlanes; ++i) + { + if (i + s >= R::nlanes) + EXPECT_EQ(dataB[i - R::nlanes + s], resC[i]); + else + EXPECT_EQ(dataA[i + s], resC[i]); + } + + return *this; + } + + TheTest & test_float_math() + { + typedef typename V_RegTrait128::int_reg Ri; + Data data1, data2, data3; + data1 *= 1.1; + data2 += 10; + R a1 = data1, a2 = data2, a3 = data3; + + Data resB = v_round(a1), + resC = v_trunc(a1), + resD = v_floor(a1), + resE = v_ceil(a1); + + Data resF = v_magnitude(a1, a2), + resG = v_sqr_magnitude(a1, a2), + resH = v_muladd(a1, a2, a3); + + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(cvRound(data1[i]), resB[i]); + EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]); + EXPECT_EQ(cvFloor(data1[i]), resD[i]); + EXPECT_EQ(cvCeil(data1[i]), resE[i]); + + EXPECT_COMPARE_EQ(std::sqrt(data1[i]*data1[i] + data2[i]*data2[i]), resF[i]); + EXPECT_COMPARE_EQ(data1[i]*data1[i] + data2[i]*data2[i], resG[i]); + EXPECT_COMPARE_EQ(data1[i]*data2[i] + data3[i], resH[i]); + } + + return *this; + } + + TheTest & test_float_cvt32() + { + typedef v_float32x4 Rt; + Data dataA; + dataA *= 1.1; + R a = dataA; + Rt b = v_cvt_f32(a); + Data resB = b; + int n = std::min(Rt::nlanes, R::nlanes); + for (int i = 0; i < n; ++i) + { + EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); + } + return *this; + } + + TheTest & test_float_cvt64() + { +#if CV_SIMD128_64F + typedef v_float64x2 Rt; + Data dataA; + dataA *= 1.1; + R a = dataA; + Rt b = v_cvt_f64(a); + Rt c = v_cvt_f64_high(a); + Data resB = b; + Data resC = c; + int n = std::min(Rt::nlanes, R::nlanes); + for (int i = 0; i < n; ++i) + { + EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]); + } + for (int i = 0; i < n; ++i) + { + EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]); + } +#endif + return *this; + } + + TheTest & test_matmul() + { + Data dataV, dataA, dataB, dataC, dataD; + dataB.reverse(); + dataC += 2; + dataD *= 0.3; + R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD; + + Data res = v_matmul(v, a, b, c, d); + for (int i = 0; i < R::nlanes; ++i) + { + LaneType val = dataV[0] * dataA[i] + + dataV[1] * dataB[i] + + dataV[2] * dataC[i] + + dataV[3] * dataD[i]; + EXPECT_DOUBLE_EQ(val, res[i]); + } + return *this; + } + + TheTest & test_transpose() + { + Data dataA, dataB, dataC, dataD; + dataB *= 5; + dataC *= 10; + dataD *= 15; + R a = dataA, b = dataB, c = dataC, d = dataD; + R e, f, g, h; + v_transpose4x4(a, b, c, d, + e, f, g, h); + + Data res[4] = {e, f, g, h}; + for (int i = 0; i < R::nlanes; ++i) + { + EXPECT_EQ(dataA[i], res[i][0]); + EXPECT_EQ(dataB[i], res[i][1]); + EXPECT_EQ(dataC[i], res[i][2]); + EXPECT_EQ(dataD[i], res[i][3]); + } + return *this; + } + + TheTest & test_reduce_sum4() + { + R a(0.1f, 0.02f, 0.003f, 0.0004f); + R b(1, 20, 300, 4000); + R c(10, 2, 0.3f, 0.04f); + R d(1, 2, 3, 4); + + R sum = v_reduce_sum4(a, b, c, d); + + Data res = sum; + EXPECT_EQ(0.1234f, res[0]); + EXPECT_EQ(4321.0f, res[1]); + EXPECT_EQ(12.34f, res[2]); + EXPECT_EQ(10.0f, res[3]); + return *this; + } + + TheTest & test_loadstore_fp16() + { +#if CV_FP16 && CV_SIMD128 + AlignedData data; + AlignedData out; + + if(1 /* checkHardwareSupport(CV_CPU_FP16) */ ) + { + // check if addresses are aligned and unaligned respectively + EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); + EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); + EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); + EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); + + // check some initialization methods + R r1 = data.u; + R r2 = v_load_f16(data.a.d); + R r3(r2); + EXPECT_EQ(data.u[0], r1.get0()); + EXPECT_EQ(data.a[0], r2.get0()); + EXPECT_EQ(data.a[0], r3.get0()); + + // check some store methods + out.a.clear(); + v_store_f16(out.a.d, r1); + EXPECT_EQ(data.a, out.a); + } + + return *this; +#endif + } + + TheTest & test_float_cvt_fp16() + { +#if CV_FP16 && CV_SIMD128 + AlignedData data; + + if(1 /* checkHardwareSupport(CV_CPU_FP16) */) + { + // check conversion + v_float32x4 r1 = v_load(data.a.d); + v_float16x4 r2 = v_cvt_f16(r1); + v_float32x4 r3 = v_cvt_f32(r2); + EXPECT_EQ(0x3c00, r2.get0()); + EXPECT_EQ(r3.get0(), r1.get0()); + } + + return *this; +#endif + } + +}; + } +} + #endif