Merge pull request #16463 from alalek:core_strong_ptr_alignment

pull/16539/head
Alexander Alekhin 5 years ago
commit eb14f9a464
  1. 7
      modules/core/include/opencv2/core/cvdef.h
  2. 52
      modules/core/include/opencv2/core/hal/intrin_cpp.hpp
  3. 26
      modules/core/src/copy.cpp
  4. 2
      modules/dnn/src/onnx/onnx_importer.cpp

@ -340,6 +340,13 @@ enum CpuFeatures {
#include "cv_cpu_dispatch.h"
#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
// int*, int64* should be propertly aligned pointers on ARMv7
#define CV_STRONG_ALIGNMENT 1
#endif
#if !defined(CV_STRONG_ALIGNMENT)
#define CV_STRONG_ALIGNMENT 0
#endif
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795

@ -1458,16 +1458,23 @@ template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const
@return register object
@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
@note Alignment requirement:
if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough).
Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`).
*/
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr);
}
/** @brief Load register contents from memory (aligned)
similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc)
*/
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr)
@ -1488,6 +1495,9 @@ v_int32x4 r = v_load_low(lo);
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
for( int i = 0; i < c.nlanes/2; i++ )
{
@ -1509,6 +1519,10 @@ v_int32x4 r = v_load_halves(lo, hi);
template<typename _Tp>
inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(loptr));
CV_Assert(isAligned<sizeof(_Tp)>(hiptr));
#endif
v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c;
for( int i = 0; i < c.nlanes/2; i++ )
{
@ -1531,6 +1545,9 @@ template<typename _Tp>
inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_TypeTraits<_Tp>::nlanes128 / 2>
v_load_expand(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
typedef typename V_TypeTraits<_Tp>::w_type w_type;
v_reg<w_type, V_TypeTraits<w_type>::nlanes128> c;
for( int i = 0; i < c.nlanes; i++ )
@ -1552,6 +1569,9 @@ template<typename _Tp>
inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_TypeTraits<_Tp>::nlanes128 / 4>
v_load_expand_q(const _Tp* ptr)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
typedef typename V_TypeTraits<_Tp>::q_type q_type;
v_reg<q_type, V_TypeTraits<q_type>::nlanes128> c;
for( int i = 0; i < c.nlanes; i++ )
@ -1572,6 +1592,9 @@ For all types except 64-bit. */
template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
v_reg<_Tp, n>& b)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i2;
for( i = i2 = 0; i < n; i++, i2 += 2 )
{
@ -1591,6 +1614,9 @@ For all types except 64-bit. */
template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
v_reg<_Tp, n>& b, v_reg<_Tp, n>& c)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i3;
for( i = i3 = 0; i < n; i++, i3 += 3 )
{
@ -1613,6 +1639,9 @@ inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
v_reg<_Tp, n>& b, v_reg<_Tp, n>& c,
v_reg<_Tp, n>& d)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i4;
for( i = i4 = 0; i < n; i++, i4 += 4 )
{
@ -1636,6 +1665,9 @@ inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i2;
for( i = i2 = 0; i < n; i++, i2 += 2 )
{
@ -1657,6 +1689,9 @@ inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i3;
for( i = i3 = 0; i < n; i++, i3 += 3 )
{
@ -1679,6 +1714,9 @@ template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_
const v_reg<_Tp, n>& d,
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
int i, i4;
for( i = i4 = 0; i < n; i++, i4 += 4 )
{
@ -1700,6 +1738,9 @@ Pointer can be unaligned. */
template<typename _Tp, int n>
inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
for( int i = 0; i < n; i++ )
ptr[i] = a.s[i];
}
@ -1707,6 +1748,9 @@ inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
template<typename _Tp, int n>
inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
v_store(ptr, a);
}
@ -1720,6 +1764,9 @@ Scheme:
template<typename _Tp, int n>
inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
for( int i = 0; i < (n/2); i++ )
ptr[i] = a.s[i];
}
@ -1734,6 +1781,9 @@ Scheme:
template<typename _Tp, int n>
inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
{
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(_Tp)>(ptr));
#endif
for( int i = 0; i < (n/2); i++ )
ptr[i] = a.s[i+(n/2)];
}

@ -563,12 +563,6 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask)
return *this;
}
#if CV_NEON && !defined(__aarch64__)
#define CV_CHECK_ALIGNMENT 1
#else
#define CV_CHECK_ALIGNMENT 0
#endif
#if CV_SIMD128
template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
{
@ -578,7 +572,7 @@ template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s
int width_1 = width & -v_uint8x16::nlanes;
int i, j;
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(T)>(src, dst));
#endif
@ -630,7 +624,7 @@ template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const
int end = (int)(size.width*esz);
int width = (end + 1)/2;
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
CV_Assert(isAligned<sizeof(T1)>(src, dst));
CV_Assert(isAligned<sizeof(T2)>(src, dst));
#endif
@ -659,7 +653,7 @@ static void
flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
{
#if CV_SIMD
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep;
#endif
if (esz == 2 * v_uint8x16::nlanes)
@ -712,7 +706,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
}
}
else if (esz == 8
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
&& isAligned<sizeof(uint64)>(alignmentMark)
#endif
)
@ -720,7 +714,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
flipHoriz_single<v_uint64x2>(src, sstep, dst, dstep, size, esz);
}
else if (esz == 4
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
&& isAligned<sizeof(unsigned)>(alignmentMark)
#endif
)
@ -728,7 +722,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
flipHoriz_single<v_uint32x4>(src, sstep, dst, dstep, size, esz);
}
else if (esz == 2
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
&& isAligned<sizeof(ushort)>(alignmentMark)
#endif
)
@ -740,7 +734,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
flipHoriz_single<v_uint8x16>(src, sstep, dst, dstep, size, esz);
}
else if (esz == 24
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
&& isAligned<sizeof(uint64_t)>(alignmentMark)
#endif
)
@ -766,7 +760,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
}
}
}
#if !CV_CHECK_ALIGNMENT
#if !CV_STRONG_ALIGNMENT
else if (esz == 12)
{
flipHoriz_double<uint64_t,uint>(src, sstep, dst, dstep, size, esz);
@ -815,7 +809,7 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
{
int i = 0;
#if CV_SIMD
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
#endif
{
@ -827,7 +821,7 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
vx_store((int*)(dst1 + i), t0);
}
}
#if CV_CHECK_ALIGNMENT
#if CV_STRONG_ALIGNMENT
else
{
for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)

@ -148,6 +148,7 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
else
{
const char* val = tensor_proto.raw_data().c_str();
#if CV_STRONG_ALIGNMENT
// Aligned pointer is required: https://github.com/opencv/opencv/issues/16373
// this doesn't work: typedef int64_t CV_DECL_ALIGNED(1) unaligned_int64_t;
AutoBuffer<int64_t, 16> aligned_val;
@ -158,6 +159,7 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
memcpy(aligned_val.data(), val, sz);
val = (const char*)aligned_val.data();
}
#endif
const int64_t* src = reinterpret_cast<const int64_t*>(val);
convertInt64ToInt32(src, dst, blob.total());
}

Loading…
Cancel
Save