risc-v: fix unaligned loads and stores

pull/23973/head
Maksim Shabunin 1 year ago
parent 99058ee30b
commit 3f0707234f
  1. 8
      modules/core/src/matrix_transform.cpp
  2. 24
      modules/imgproc/src/demosaicing.cpp

@ -603,10 +603,10 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
{
for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
{
v_int32 t0 = vx_load((int*)(src0 + i));
v_int32 t1 = vx_load((int*)(src1 + i));
v_store((int*)(dst0 + i), t1);
v_store((int*)(dst1 + i), t0);
v_int32 t0 = v_reinterpret_as_s32(vx_load(src0 + i));
v_int32 t1 = v_reinterpret_as_s32(vx_load(src1 + i));
v_store(dst0 + i, v_reinterpret_as_u8(t1));
v_store(dst1 + i, v_reinterpret_as_u8(t0));
}
}
#if CV_STRONG_ALIGNMENT

@ -184,9 +184,9 @@ public:
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
{
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = ((r0 << 8) >> 7) + ((r2 << 8) >> 7);
v_uint16x8 b0 = v_rotate_right<1>(b1) + b1;
@ -265,9 +265,9 @@ public:
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
{
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo);
v_uint16x8 nextb1 = v_rotate_right<1>(b1);
@ -398,9 +398,9 @@ public:
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 )
{
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo);
v_uint16x8 nextb1 = v_rotate_right<1>(b1);
@ -494,9 +494,9 @@ public:
B G B G | B G B G | B G B G | B G B G
*/
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = (r0 & masklow) + (r2 & masklow);
v_uint16x8 nextb1 = v_rotate_right<1>(b1);

Loading…
Cancel
Save