opencv/modules/core/test/test_intrin.cpp

#include "test_precomp.hpp"
#include <climits>

#include "test_intrin_utils.hpp"

#define CV_CPU_SIMD_FILENAME "test_intrin_utils.hpp"
#define CV_CPU_DISPATCH_MODE FP16
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"


using namespace cv;

namespace cvtest { namespace hal {
using namespace CV_CPU_OPTIMIZATION_NAMESPACE;

//=============  8-bit integer =====================================================================

TEST(hal_intrin, uint8x16) {
    TheTest<v_uint8x16>()
        .test_loadstore()
        .test_interleave()
        .test_expand()
        .test_expand_q()
        .test_addsub()
        .test_addsub_wrap()
        .test_cmp()
        .test_logic()
        .test_min_max()
        .test_absdiff()
        .test_mask()
        .test_popcount()
        .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
        .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
        .test_unpack()
        .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
        ;
}

TEST(hal_intrin, int8x16) {
    TheTest<v_int8x16>()
        .test_loadstore()
        .test_interleave()
        .test_expand()
        .test_expand_q()
        .test_addsub()
        .test_addsub_wrap()
        .test_cmp()
        .test_logic()
        .test_min_max()
        .test_absdiff()
        .test_abs()
        .test_mask()
        .test_popcount()
        .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
        .test_unpack()
        .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
        ;
}

//============= 16-bit integer =====================================================================

TEST(hal_intrin, uint16x8) {
    TheTest<v_uint16x8>()
        .test_loadstore()
        .test_interleave()
        .test_expand()
        .test_addsub()
        .test_addsub_wrap()
        .test_mul()
        .test_mul_expand()
        .test_cmp()
        .test_shift<1>()
        .test_shift<8>()
        .test_logic()
        .test_min_max()
        .test_absdiff()
        .test_reduce()
        .test_mask()
        .test_popcount()
        .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
        .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
        .test_unpack()
        .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
        ;
}

TEST(hal_intrin, int16x8) {
    TheTest<v_int16x8>()
        .test_loadstore()
        .test_interleave()
        .test_expand()
        .test_addsub()
        .test_addsub_wrap()
        .test_mul()
        .test_mul_expand()
        .test_cmp()
        .test_shift<1>()
        .test_shift<8>()
        .test_dot_prod()
        .test_logic()
        .test_min_max()
        .test_absdiff()
        .test_abs()
        .test_reduce()
        .test_mask()
        .test_popcount()
        .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
        .test_unpack()
        .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
        ;
}

//============= 32-bit integer =====================================================================

TEST(hal_intrin, uint32x4) {
    TheTest<v_uint32x4>()
        .test_loadstore()
        .test_interleave()
        .test_expand()
        .test_addsub()
        .test_mul()
        .test_mul_expand()
        .test_cmp()
        .test_shift<1>()
        .test_shift<8>()
        .test_logic()
        .test_min_max()
        .test_absdiff()
        .test_reduce()
        .test_mask()
        .test_popcount()
        .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
        .test_unpack()
        .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
        .test_transpose()
        ;
}

TEST(hal_intrin, int32x4) {
    TheTest<v_int32x4>()
        .test_loadstore()
        .test_interleave()
        .test_expand()
        .test_addsub()
        .test_mul()
        .test_abs()
        .test_cmp()
        .test_popcount()
        .test_shift<1>().test_shift<8>()
        .test_logic()
        .test_min_max()
        .test_absdiff()
        .test_reduce()
        .test_mask()
        .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
        .test_unpack()
        .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
        .test_float_cvt32()
        .test_float_cvt64()
        .test_transpose()
        ;
}

//============= 64-bit integer =====================================================================

TEST(hal_intrin, uint64x2) {
    TheTest<v_uint64x2>()
        .test_loadstore()
        .test_addsub()
        .test_shift<1>().test_shift<8>()
        .test_logic()
        .test_extract<0>().test_extract<1>()
        ;
}

TEST(hal_intrin, int64x2) {
    TheTest<v_int64x2>()
        .test_loadstore()
        .test_addsub()
        .test_shift<1>().test_shift<8>()
        .test_logic()
        .test_extract<0>().test_extract<1>()
        ;
}

//============= Floating point =====================================================================

TEST(hal_intrin, float32x4) {
    TheTest<v_float32x4>()
        .test_loadstore()
        .test_interleave()
        .test_interleave_2channel()
        .test_addsub()
        .test_mul()
        .test_div()
        .test_cmp()
        .test_sqrt_abs()
        .test_min_max()
        .test_float_absdiff()
        .test_reduce()
        .test_mask()
        .test_unpack()
        .test_float_math()
        .test_float_cvt64()
        .test_matmul()
        .test_transpose()
        .test_reduce_sum4()
        ;
}

#if CV_SIMD128_64F
TEST(hal_intrin, float64x2) {
    TheTest<v_float64x2>()
        .test_loadstore()
        .test_addsub()
        .test_mul()
        .test_div()
        .test_cmp()
        .test_sqrt_abs()
        .test_min_max()
        .test_float_absdiff()
        .test_mask()
        .test_unpack()
        .test_float_math()
        .test_float_cvt32()
        ;
}
#endif

TEST(hal_intrin,float16x4)
{
    CV_CPU_CALL_FP16(test_hal_intrin_float16x4, ());
    throw SkipTestException("Unsupported hardware: FP16 is not available");
}

}}
use universal intrinsic for FP16 * use v_float16x4 (universal intrinsic) instead of raw SSE/NEON implementation * define v_load_f16/v_store_f16 since v_load can't be distinguished when short pointer passed * brush up implementation on old compiler (guard correctly) * add test for v_load_f16 and round trip conversion of v_float16x4 * fix conversion error 9 years ago			`#include "test_precomp.hpp"`
HAL universal intrinsics tests and documentation 10 years ago			`#include <climits>`

test(hal): properly dispatch FP16 test 8 years ago			`#include "test_intrin_utils.hpp"`

			`#define CV_CPU_SIMD_FILENAME "test_intrin_utils.hpp"`
			`#define CV_CPU_DISPATCH_MODE FP16`
			`#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"`


HAL universal intrinsics tests and documentation 10 years ago			`using namespace cv;`

fix hal_intrin test on 64bit ARM * fix issue 6521 * use correct comparison 9 years ago			`namespace cvtest { namespace hal {`
test(hal): properly dispatch FP16 test 8 years ago			`using namespace CV_CPU_OPTIMIZATION_NAMESPACE;`
fix hal_intrin test on 64bit ARM * fix issue 6521 * use correct comparison 9 years ago
HAL universal intrinsics tests and documentation 10 years ago			`//============= 8-bit integer =====================================================================`

			`TEST(hal_intrin, uint8x16) {`
			`TheTest<v_uint8x16>()`
			`.test_loadstore()`
			`.test_interleave()`
			`.test_expand()`
			`.test_expand_q()`
			`.test_addsub()`
			`.test_addsub_wrap()`
			`.test_cmp()`
			`.test_logic()`
			`.test_min_max()`
			`.test_absdiff()`
			`.test_mask()`
use universal intrinsic and SSE4 popcount instruction in normHamming - add v_popcount in universal intrinsic - add test for v_popcount - add wrapper of popcount for both MSVC and GCC 8 years ago			`.test_popcount()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()`
			`.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()`
			`.test_unpack()`
			`.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()`
			`;`
			`}`

			`TEST(hal_intrin, int8x16) {`
			`TheTest<v_int8x16>()`
			`.test_loadstore()`
			`.test_interleave()`
			`.test_expand()`
			`.test_expand_q()`
			`.test_addsub()`
			`.test_addsub_wrap()`
			`.test_cmp()`
			`.test_logic()`
			`.test_min_max()`
			`.test_absdiff()`
use universal intrinsic in canny * add v_abs for universal intrinsic * add test of v_abs in test_intrin * fix compile error on gcc * fix bool OR operation 9 years ago			`.test_abs()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_mask()`
use universal intrinsic and SSE4 popcount instruction in normHamming - add v_popcount in universal intrinsic - add test for v_popcount - add wrapper of popcount for both MSVC and GCC 8 years ago			`.test_popcount()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()`
			`.test_unpack()`
			`.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()`
			`;`
			`}`

			`//============= 16-bit integer =====================================================================`

			`TEST(hal_intrin, uint16x8) {`
			`TheTest<v_uint16x8>()`
			`.test_loadstore()`
			`.test_interleave()`
			`.test_expand()`
			`.test_addsub()`
			`.test_addsub_wrap()`
			`.test_mul()`
			`.test_mul_expand()`
			`.test_cmp()`
			`.test_shift<1>()`
			`.test_shift<8>()`
			`.test_logic()`
			`.test_min_max()`
			`.test_absdiff()`
add universal intrinsic in StereoSGBM * add 8 elements version of reduce operation * add tests for new universal intrinsic 8 years ago			`.test_reduce()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_mask()`
use universal intrinsic and SSE4 popcount instruction in normHamming - add v_popcount in universal intrinsic - add test for v_popcount - add wrapper of popcount for both MSVC and GCC 8 years ago			`.test_popcount()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()`
			`.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()`
			`.test_unpack()`
			`.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()`
			`;`
			`}`

			`TEST(hal_intrin, int16x8) {`
			`TheTest<v_int16x8>()`
			`.test_loadstore()`
			`.test_interleave()`
			`.test_expand()`
			`.test_addsub()`
			`.test_addsub_wrap()`
			`.test_mul()`
			`.test_mul_expand()`
			`.test_cmp()`
			`.test_shift<1>()`
			`.test_shift<8>()`
			`.test_dot_prod()`
			`.test_logic()`
			`.test_min_max()`
			`.test_absdiff()`
use universal intrinsic in canny * add v_abs for universal intrinsic * add test of v_abs in test_intrin * fix compile error on gcc * fix bool OR operation 9 years ago			`.test_abs()`
add universal intrinsic in StereoSGBM * add 8 elements version of reduce operation * add tests for new universal intrinsic 8 years ago			`.test_reduce()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_mask()`
use universal intrinsic and SSE4 popcount instruction in normHamming - add v_popcount in universal intrinsic - add test for v_popcount - add wrapper of popcount for both MSVC and GCC 8 years ago			`.test_popcount()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()`
			`.test_unpack()`
			`.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()`
			`;`
			`}`

			`//============= 32-bit integer =====================================================================`

			`TEST(hal_intrin, uint32x4) {`
			`TheTest<v_uint32x4>()`
			`.test_loadstore()`
			`.test_interleave()`
			`.test_expand()`
			`.test_addsub()`
			`.test_mul()`
			`.test_mul_expand()`
			`.test_cmp()`
			`.test_shift<1>()`
			`.test_shift<8>()`
			`.test_logic()`
			`.test_min_max()`
			`.test_absdiff()`
			`.test_reduce()`
			`.test_mask()`
use universal intrinsic and SSE4 popcount instruction in normHamming - add v_popcount in universal intrinsic - add test for v_popcount - add wrapper of popcount for both MSVC and GCC 8 years ago			`.test_popcount()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()`
			`.test_unpack()`
			`.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()`
			`.test_transpose()`
			`;`
			`}`

			`TEST(hal_intrin, int32x4) {`
			`TheTest<v_int32x4>()`
			`.test_loadstore()`
			`.test_interleave()`
			`.test_expand()`
			`.test_addsub()`
			`.test_mul()`
use universal intrinsic in canny * add v_abs for universal intrinsic * add test of v_abs in test_intrin * fix compile error on gcc * fix bool OR operation 9 years ago			`.test_abs()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_cmp()`
use universal intrinsic and SSE4 popcount instruction in normHamming - add v_popcount in universal intrinsic - add test for v_popcount - add wrapper of popcount for both MSVC and GCC 8 years ago			`.test_popcount()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_shift<1>().test_shift<8>()`
			`.test_logic()`
			`.test_min_max()`
			`.test_absdiff()`
			`.test_reduce()`
			`.test_mask()`
			`.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()`
			`.test_unpack()`
			`.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()`
			`.test_float_cvt32()`
			`.test_float_cvt64()`
			`.test_transpose()`
			`;`
			`}`

			`//============= 64-bit integer =====================================================================`

			`TEST(hal_intrin, uint64x2) {`
			`TheTest<v_uint64x2>()`
			`.test_loadstore()`
			`.test_addsub()`
			`.test_shift<1>().test_shift<8>()`
			`.test_logic()`
			`.test_extract<0>().test_extract<1>()`
			`;`
			`}`

			`TEST(hal_intrin, int64x2) {`
			`TheTest<v_int64x2>()`
			`.test_loadstore()`
			`.test_addsub()`
			`.test_shift<1>().test_shift<8>()`
			`.test_logic()`
			`.test_extract<0>().test_extract<1>()`
			`;`
			`}`

			`//============= Floating point =====================================================================`

			`TEST(hal_intrin, float32x4) {`
			`TheTest<v_float32x4>()`
			`.test_loadstore()`
			`.test_interleave()`
2-channel interleaved load/store for universal intrinsics (float only) * Added 2-channel ops to match existing 3-channel and 4-channel ops * v_load_deinterleave() and v_store_interleave() * Implements float32x4 only on SSE (but all types on NEON and CPP) * Includes tests * Will be used to vectorize 2D functions, such as estimateAffine2D() 9 years ago			`.test_interleave_2channel()`
HAL universal intrinsics tests and documentation 10 years ago			`.test_addsub()`
			`.test_mul()`
			`.test_div()`
			`.test_cmp()`
			`.test_sqrt_abs()`
			`.test_min_max()`
			`.test_float_absdiff()`
			`.test_reduce()`
			`.test_mask()`
			`.test_unpack()`
			`.test_float_math()`
			`.test_float_cvt64()`
			`.test_matmul()`
			`.test_transpose()`
build: fix v_reduce_sum4 (requires SSE3) 8 years ago			`.test_reduce_sum4()`
HAL universal intrinsics tests and documentation 10 years ago			`;`
			`}`

			`#if CV_SIMD128_64F`
			`TEST(hal_intrin, float64x2) {`
			`TheTest<v_float64x2>()`
			`.test_loadstore()`
			`.test_addsub()`
			`.test_mul()`
			`.test_div()`
			`.test_cmp()`
			`.test_sqrt_abs()`
			`.test_min_max()`
			`.test_float_absdiff()`
			`.test_mask()`
			`.test_unpack()`
			`.test_float_math()`
			`.test_float_cvt32()`
			`;`
			`}`
			`#endif`
fix hal_intrin test on 64bit ARM * fix issue 6521 * use correct comparison 9 years ago
test(hal): properly dispatch FP16 test 8 years ago			`TEST(hal_intrin,float16x4)`
			`{`
			`CV_CPU_CALL_FP16(test_hal_intrin_float16x4, ());`
			`throw SkipTestException("Unsupported hardware: FP16 is not available");`
			`}`

Extending CPU dispatch to the tests; fixing a typo 8 years ago			`}}`