diff --git a/CMakeLists.txt b/CMakeLists.txt index dc7b72c4b8..96d2ef8c1c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -318,7 +318,7 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC ) OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) -OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF (CV_GCC AND PPC64LE) ) +OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF ((CV_GCC OR CV_CLANG) AND PPC64LE) ) OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) ) diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index 9f050f7c21..5b006d12f2 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -1,46 +1,6 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html #ifndef OPENCV_HAL_VSX_HPP #define OPENCV_HAL_VSX_HPP @@ -276,34 +236,38 @@ OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2) OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4) OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2) -#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \ +#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a) \ inline _Tpvec v_load(const _Tp* ptr) \ -{ return _Tpvec(ld_func(0, ptr)); } \ -inline _Tpvec v_load_aligned(const _Tp* ptr) \ -{ return _Tpvec(ld_func(0, ptr)); } \ +{ return _Tpvec(ld(0, ptr)); } \ +inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr)) \ +{ return _Tpvec(ld_a(0, ptr)); } \ inline _Tpvec v_load_low(const _Tp* ptr) \ { return _Tpvec(vec_ld_l8(ptr)); } \ inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ { return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \ inline void v_store(_Tp* ptr, const _Tpvec& a) \ -{ st_func(a.val, 0, ptr); } \ -inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ -{ st_func(a.val, 0, ptr); } \ +{ st(a.val, 0, ptr); } \ +inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \ +{ st_a(a.val, 0, ptr); } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { vec_st_l8(a.val, ptr); } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ { vec_st_h8(a.val, ptr); } -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2) -OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2) +#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \ +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st) + +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16, uchar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16, schar) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8, ushort) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8, short) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4, uint) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4, int) +OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float) + +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld, vsx_ld, vsx_st, vsx_st) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2, uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) +OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2, int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2) //////////////// Value reordering /////////////// @@ -343,7 +307,7 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ b1.val = fl(a.val); \ } \ inline _Tpwvec v_load_expand(const _Tp* ptr) \ -{ return _Tpwvec(fh(vsx_ld(0, ptr))); } +{ return _Tpwvec(fh(vec_ld_l8(ptr))); } OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu) OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh) @@ -353,10 +317,10 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpac OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh) inline v_uint32x4 v_load_expand_q(const uchar* ptr) -{ return v_uint32x4(vec_ld_buw(ptr)); } +{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); } inline v_int32x4 v_load_expand_q(const schar* ptr) -{ return v_int32x4(vec_ld_bsw(ptr)); } +{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); } /* pack */ #define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \ @@ -429,36 +393,6 @@ inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d) d.val = vec_mergesql(a.val, b.val); } -/* Extract */ -template -inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) -{ - const int w = sizeof(typename _Tpvec::lane_type); - const int n = _Tpvec::nlanes; - const unsigned int sf = ((w * n) - (s * w)); - if (s == 0) - return _Tpvec(a.val); - else if (sf > 15) - return _Tpvec(); - // bitwise it just to make xlc happy - return _Tpvec(vec_sld(b.val, a.val, sf & 15)); -} - -#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec) \ -template \ -inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ -{ \ - switch(s) { \ - case 0: return _Tpvec(a.val); \ - case 2: return _Tpvec(b.val); \ - case 1: return _Tpvec(vec_sldw(b.val, a.val, 2)); \ - default: return _Tpvec(); \ - } \ -} -OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2) -OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2) - - ////////// Arithmetic, bitwise and comparison operations ///////// /* Element-wise binary and unary operations */ @@ -669,6 +603,11 @@ OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b) OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a) OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a) +/* Extract */ +template +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) +{ return v_rotate_right(a, b); } + ////////// Reduce and mask ///////// /** Reduce **/ diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index c377551364..21b50e8611 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -1,46 +1,6 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Copyright (C) 2015, Itseez Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html #ifndef OPENCV_HAL_VSX_UTILS_HPP #define OPENCV_HAL_VSX_UTILS_HPP @@ -64,106 +24,77 @@ typedef __vector unsigned char vec_uchar16; #define vec_uchar16_set(...) (vec_uchar16){__VA_ARGS__} #define vec_uchar16_sp(c) (__VSX_S16__(vec_uchar16, c)) #define vec_uchar16_c(v) ((vec_uchar16)(v)) -#define vec_uchar16_mx vec_uchar16_sp(0xFF) -#define vec_uchar16_mn vec_uchar16_sp(0) -#define vec_uchar16_z vec_uchar16_mn +#define vec_uchar16_z vec_uchar16_sp(0) typedef __vector signed char vec_char16; #define vec_char16_set(...) (vec_char16){__VA_ARGS__} #define vec_char16_sp(c) (__VSX_S16__(vec_char16, c)) #define vec_char16_c(v) ((vec_char16)(v)) -#define vec_char16_mx vec_char16_sp(0x7F) -#define vec_char16_mn vec_char16_sp(-0x7F-1) #define vec_char16_z vec_char16_sp(0) typedef __vector unsigned short vec_ushort8; #define vec_ushort8_set(...) (vec_ushort8){__VA_ARGS__} #define vec_ushort8_sp(c) (__VSX_S8__(vec_ushort8, c)) #define vec_ushort8_c(v) ((vec_ushort8)(v)) -#define vec_ushort8_mx vec_ushort8_sp(0xFFFF) -#define vec_ushort8_mn vec_ushort8_sp(0) -#define vec_ushort8_z vec_ushort8_mn +#define vec_ushort8_z vec_ushort8_sp(0) typedef __vector signed short vec_short8; #define vec_short8_set(...) (vec_short8){__VA_ARGS__} #define vec_short8_sp(c) (__VSX_S8__(vec_short8, c)) #define vec_short8_c(v) ((vec_short8)(v)) -#define vec_short8_mx vec_short8_sp(0x7FFF) -#define vec_short8_mn vec_short8_sp(-0x7FFF-1) #define vec_short8_z vec_short8_sp(0) typedef __vector unsigned int vec_uint4; #define vec_uint4_set(...) (vec_uint4){__VA_ARGS__} #define vec_uint4_sp(c) (__VSX_S4__(vec_uint4, c)) #define vec_uint4_c(v) ((vec_uint4)(v)) -#define vec_uint4_mx vec_uint4_sp(0xFFFFFFFFU) -#define vec_uint4_mn vec_uint4_sp(0) -#define vec_uint4_z vec_uint4_mn +#define vec_uint4_z vec_uint4_sp(0) typedef __vector signed int vec_int4; #define vec_int4_set(...) (vec_int4){__VA_ARGS__} #define vec_int4_sp(c) (__VSX_S4__(vec_int4, c)) #define vec_int4_c(v) ((vec_int4)(v)) -#define vec_int4_mx vec_int4_sp(0x7FFFFFFF) -#define vec_int4_mn vec_int4_sp(-0x7FFFFFFF-1) #define vec_int4_z vec_int4_sp(0) typedef __vector float vec_float4; #define vec_float4_set(...) (vec_float4){__VA_ARGS__} #define vec_float4_sp(c) (__VSX_S4__(vec_float4, c)) #define vec_float4_c(v) ((vec_float4)(v)) -#define vec_float4_mx vec_float4_sp(3.40282347E+38F) -#define vec_float4_mn vec_float4_sp(1.17549435E-38F) #define vec_float4_z vec_float4_sp(0) typedef __vector unsigned long long vec_udword2; #define vec_udword2_set(...) (vec_udword2){__VA_ARGS__} #define vec_udword2_sp(c) (__VSX_S2__(vec_udword2, c)) #define vec_udword2_c(v) ((vec_udword2)(v)) -#define vec_udword2_mx vec_udword2_sp(18446744073709551615ULL) -#define vec_udword2_mn vec_udword2_sp(0) -#define vec_udword2_z vec_udword2_mn +#define vec_udword2_z vec_udword2_sp(0) typedef __vector signed long long vec_dword2; #define vec_dword2_set(...) (vec_dword2){__VA_ARGS__} #define vec_dword2_sp(c) (__VSX_S2__(vec_dword2, c)) #define vec_dword2_c(v) ((vec_dword2)(v)) -#define vec_dword2_mx vec_dword2_sp(9223372036854775807LL) -#define vec_dword2_mn vec_dword2_sp(-9223372036854775807LL-1) #define vec_dword2_z vec_dword2_sp(0) typedef __vector double vec_double2; #define vec_double2_set(...) (vec_double2){__VA_ARGS__} #define vec_double2_c(v) ((vec_double2)(v)) #define vec_double2_sp(c) (__VSX_S2__(vec_double2, c)) -#define vec_double2_mx vec_double2_sp(1.7976931348623157E+308) -#define vec_double2_mn vec_double2_sp(2.2250738585072014E-308) #define vec_double2_z vec_double2_sp(0) #define vec_bchar16 __vector __bool char #define vec_bchar16_set(...) (vec_bchar16){__VA_ARGS__} #define vec_bchar16_c(v) ((vec_bchar16)(v)) -#define vec_bchar16_f (__VSX_S16__(vec_bchar16, 0)) -#define vec_bchar16_t (__VSX_S16__(vec_bchar16, 1)) #define vec_bshort8 __vector __bool short #define vec_bshort8_set(...) (vec_bshort8){__VA_ARGS__} #define vec_bshort8_c(v) ((vec_bshort8)(v)) -#define vec_bshort8_f (__VSX_S8__(vec_bshort8, 0)) -#define vec_bshort8_t (__VSX_S8__(vec_bshort8, 1)) #define vec_bint4 __vector __bool int #define vec_bint4_set(...) (vec_bint4){__VA_ARGS__} #define vec_bint4_c(v) ((vec_bint4)(v)) -#define vec_bint4_f (__VSX_S4__(vec_bint4, 0)) -#define vec_bint4_t (__VSX_S4__(vec_bint4, 1)) #define vec_bdword2 __vector __bool long long #define vec_bdword2_set(...) (vec_bdword2){__VA_ARGS__} #define vec_bdword2_c(v) ((vec_bdword2)(v)) -#define vec_bdword2_f (__VSX_S2__(vec_bdword2, 0)) -#define vec_bdword2_t (__VSX_S2__(vec_bdword2, 1)) - #define VSX_FINLINE(tp) extern inline tp __attribute__((always_inline)) @@ -688,34 +619,17 @@ VSX_IMPL_CONV_ODD_2_4(vec_uint4, vec_double2, vec_ctuo, vec_ctu) { vsx_stf(vec, VSX_OFFSET(o, p), (long long*)p); } #endif -// load 4 unsigned bytes into uint4 vector -#define vec_ld_buw(p) vec_uint4_set((p)[0], (p)[1], (p)[2], (p)[3]) - -// load 4 signed bytes into int4 vector -#define vec_ld_bsw(p) vec_int4_set((p)[0], (p)[1], (p)[2], (p)[3]) - -// load 4 unsigned bytes into float vector -#define vec_ld_bps(p) vec_ctf(vec_ld_buw(p), 0) - // Store lower 8 byte #define vec_st_l8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 0) // Store higher 8 byte #define vec_st_h8(v, p) *((uint64*)(p)) = vec_extract(vec_udword2_c(v), 1) -/* - * vec_ld_l8(ptr) -> Load 64-bits of integer data to lower part - * vec_ldz_l8(ptr) -> Load 64-bits of integer data to lower part and zero upper part -**/ -#define VSX_IMPL_LOAD_L8(Tvec, Tp) \ -VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \ -{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } \ -VSX_FINLINE(Tvec) vec_ldz_l8(const Tp *p) \ -{ \ - /* TODO: try (Tvec)(vec_udword2{*((uint64*)p), 0}) */ \ - static const vec_bdword2 mask = {0xFFFFFFFFFFFFFFFF, 0x0000000000000000}; \ - return vec_and(vec_ld_l8(p), (Tvec)mask); \ -} +// Load 64-bits of integer data to lower part +#define VSX_IMPL_LOAD_L8(Tvec, Tp) \ +VSX_FINLINE(Tvec) vec_ld_l8(const Tp *p) \ +{ return ((Tvec)vec_promote(*((uint64*)p), 0)); } + VSX_IMPL_LOAD_L8(vec_uchar16, uchar) VSX_IMPL_LOAD_L8(vec_char16, schar) VSX_IMPL_LOAD_L8(vec_ushort8, ushort) @@ -745,11 +659,11 @@ VSX_IMPL_LOAD_L8(vec_double2, double) * Implement vec_unpacklu and vec_unpackhu * since vec_unpackl, vec_unpackh only support signed integers **/ -#define VSX_IMPL_UNPACKU(rt, rg, zero) \ -VSX_FINLINE(rt) vec_unpacklu(const rg& a) \ -{ return reinterpret_cast(vec_mergel(a, zero)); } \ -VSX_FINLINE(rt) vec_unpackhu(const rg& a) \ -{ return reinterpret_cast(vec_mergeh(a, zero)); } +#define VSX_IMPL_UNPACKU(rt, rg, zero) \ +VSX_FINLINE(rt) vec_unpacklu(const rg& a) \ +{ return (rt)(vec_mergel(a, zero)); } \ +VSX_FINLINE(rt) vec_unpackhu(const rg& a) \ +{ return (rt)(vec_mergeh(a, zero)); } VSX_IMPL_UNPACKU(vec_ushort8, vec_uchar16, vec_uchar16_z) VSX_IMPL_UNPACKU(vec_uint4, vec_ushort8, vec_ushort8_z)