|
|
|
@ -298,7 +298,7 @@ void warpAffineLinearInvoker_8UC1(const uint8_t *src_data, size_t src_step, int |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -418,7 +418,7 @@ void warpAffineLinearInvoker_8UC3(const uint8_t *src_data, size_t src_step, int |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -547,7 +547,7 @@ void warpAffineLinearInvoker_8UC4(const uint8_t *src_data, size_t src_step, int |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -590,11 +590,11 @@ void warpAffineLinearInvoker_8UC4(const uint8_t *src_data, size_t src_step, int |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
uint8_t pixbuf[max_uf*4*4]; |
|
|
|
@ -660,7 +660,7 @@ void warpAffineLinearInvoker_16UC1(const uint16_t *src_data, size_t src_step, in |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -761,7 +761,7 @@ void warpAffineLinearInvoker_16UC3(const uint16_t *src_data, size_t src_step, in |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -866,7 +866,7 @@ void warpAffineLinearInvoker_16UC4(const uint16_t *src_data, size_t src_step, in |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -876,7 +876,6 @@ void warpAffineLinearInvoker_16UC4(const uint16_t *src_data, size_t src_step, in |
|
|
|
|
int32_t addr[max_uf], |
|
|
|
|
src_ix[max_uf], |
|
|
|
|
src_iy[max_uf]; |
|
|
|
|
uint16_t pixbuf[max_uf*4*4]; |
|
|
|
|
|
|
|
|
|
uint16_t bvalbuf[max_uf*4]; |
|
|
|
|
for (int i = 0; i < uf; i++) { |
|
|
|
@ -904,18 +903,26 @@ void warpAffineLinearInvoker_16UC4(const uint16_t *src_data, size_t src_step, in |
|
|
|
|
CV_WARPAFFINE_LINEAR_VECTOR_COMPUTE_MAPPED_COORD2(C4); |
|
|
|
|
|
|
|
|
|
if (v_reduce_min(inner_mask) != 0) { // all loaded pixels are completely inside the image
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(C4, 16U); |
|
|
|
|
float valpha[max_uf], vbeta[max_uf]; |
|
|
|
|
vx_store(valpha, src_x0); |
|
|
|
|
vx_store(valpha+vlanes_32, src_x1); |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
uint16_t pixbuf[max_uf*4*4]; |
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(C4, 16U); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(C4); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(C4); |
|
|
|
|
} |
|
|
|
|
#endif // (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
|
|
|
|
|
@ -972,7 +979,7 @@ void warpAffineLinearInvoker_32FC1(const float *src_data, size_t src_step, int s |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1071,7 +1078,7 @@ void warpAffineLinearInvoker_32FC3(const float *src_data, size_t src_step, int s |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1176,7 +1183,7 @@ void warpAffineLinearInvoker_32FC4(const float *src_data, size_t src_step, int s |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1186,7 +1193,6 @@ void warpAffineLinearInvoker_32FC4(const float *src_data, size_t src_step, int s |
|
|
|
|
int32_t addr[max_uf], |
|
|
|
|
src_ix[max_uf], |
|
|
|
|
src_iy[max_uf]; |
|
|
|
|
float pixbuf[max_uf*4*4]; |
|
|
|
|
|
|
|
|
|
float bvalbuf[max_uf*4]; |
|
|
|
|
for (int i = 0; i < uf; i++) { |
|
|
|
@ -1218,16 +1224,25 @@ void warpAffineLinearInvoker_32FC4(const float *src_data, size_t src_step, int s |
|
|
|
|
CV_WARPAFFINE_LINEAR_VECTOR_COMPUTE_MAPPED_COORD2(C4); |
|
|
|
|
|
|
|
|
|
if (v_reduce_min(inner_mask) != 0) { // all loaded pixels are completely inside the image
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(C4, 32F); |
|
|
|
|
float valpha[max_uf], vbeta[max_uf]; |
|
|
|
|
vx_store(valpha, src_x0); |
|
|
|
|
vx_store(valpha+vlanes_32, src_x1); |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
float pixbuf[max_uf*4*4]; |
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(C4, 32F); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(C4); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(C4); |
|
|
|
|
} |
|
|
|
|
#endif // (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
|
|
|
|
|
@ -1284,7 +1299,7 @@ void warpAffineLinearApproxInvoker_8UC1(const uint8_t *src_data, size_t src_step |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1391,7 +1406,7 @@ void warpAffineLinearApproxInvoker_8UC3(const uint8_t *src_data, size_t src_step |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1505,7 +1520,7 @@ void warpAffineLinearApproxInvoker_8UC4(const uint8_t *src_data, size_t src_step |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1622,7 +1637,7 @@ void warpPerspectiveLinearInvoker_8UC1(const uint8_t *src_data, size_t src_step, |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1744,7 +1759,7 @@ void warpPerspectiveLinearInvoker_8UC3(const uint8_t *src_data, size_t src_step, |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1874,7 +1889,7 @@ void warpPerspectiveLinearInvoker_8UC4(const uint8_t *src_data, size_t src_step, |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -1917,11 +1932,11 @@ void warpPerspectiveLinearInvoker_8UC4(const uint8_t *src_data, size_t src_step, |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
uint8_t pixbuf[max_uf*4*4]; |
|
|
|
@ -1988,7 +2003,7 @@ void warpPerspectiveLinearInvoker_16UC1(const uint16_t *src_data, size_t src_ste |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2089,7 +2104,7 @@ void warpPerspectiveLinearInvoker_16UC3(const uint16_t *src_data, size_t src_ste |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2194,7 +2209,7 @@ void warpPerspectiveLinearInvoker_16UC4(const uint16_t *src_data, size_t src_ste |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2204,7 +2219,6 @@ void warpPerspectiveLinearInvoker_16UC4(const uint16_t *src_data, size_t src_ste |
|
|
|
|
int32_t addr[max_uf], |
|
|
|
|
src_ix[max_uf], |
|
|
|
|
src_iy[max_uf]; |
|
|
|
|
uint16_t pixbuf[max_uf*4*4]; |
|
|
|
|
|
|
|
|
|
uint16_t bvalbuf[max_uf*4]; |
|
|
|
|
for (int i = 0; i < uf; i++) { |
|
|
|
@ -2232,18 +2246,26 @@ void warpPerspectiveLinearInvoker_16UC4(const uint16_t *src_data, size_t src_ste |
|
|
|
|
CV_WARPPERSPECTIVE_LINEAR_VECTOR_COMPUTE_MAPPED_COORD2(C4); |
|
|
|
|
|
|
|
|
|
if (v_reduce_min(inner_mask) != 0) { // all loaded pixels are completely inside the image
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(C4, 16U); |
|
|
|
|
float valpha[max_uf], vbeta[max_uf]; |
|
|
|
|
vx_store(valpha, src_x0); |
|
|
|
|
vx_store(valpha+vlanes_32, src_x1); |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
uint16_t pixbuf[max_uf*4*4]; |
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(C4, 16U); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(C4); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(C4); |
|
|
|
|
} |
|
|
|
|
#endif // (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
|
|
|
|
|
@ -2301,7 +2323,7 @@ void warpPerspectiveLinearInvoker_32FC1(const float *src_data, size_t src_step, |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2401,7 +2423,7 @@ void warpPerspectiveLinearInvoker_32FC3(const float *src_data, size_t src_step, |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2507,7 +2529,7 @@ void warpPerspectiveLinearInvoker_32FC4(const float *src_data, size_t src_step, |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2517,7 +2539,6 @@ void warpPerspectiveLinearInvoker_32FC4(const float *src_data, size_t src_step, |
|
|
|
|
int32_t addr[max_uf], |
|
|
|
|
src_ix[max_uf], |
|
|
|
|
src_iy[max_uf]; |
|
|
|
|
float pixbuf[max_uf*4*4]; |
|
|
|
|
|
|
|
|
|
float bvalbuf[max_uf*4]; |
|
|
|
|
for (int i = 0; i < uf; i++) { |
|
|
|
@ -2549,16 +2570,25 @@ void warpPerspectiveLinearInvoker_32FC4(const float *src_data, size_t src_step, |
|
|
|
|
CV_WARPPERSPECTIVE_LINEAR_VECTOR_COMPUTE_MAPPED_COORD2(C4); |
|
|
|
|
|
|
|
|
|
if (v_reduce_min(inner_mask) != 0) { // all loaded pixels are completely inside the image
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(C4, 32F); |
|
|
|
|
float valpha[max_uf], vbeta[max_uf]; |
|
|
|
|
vx_store(valpha, src_x0); |
|
|
|
|
vx_store(valpha+vlanes_32, src_x1); |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
float pixbuf[max_uf*4*4]; |
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(C4, 32F); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(C4); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(C4); |
|
|
|
|
} |
|
|
|
|
#endif // (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
|
|
|
|
|
@ -2616,7 +2646,7 @@ void warpPerspectiveLinearApproxInvoker_8UC1(const uint8_t *src_data, size_t src |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2724,7 +2754,7 @@ void warpPerspectiveLinearApproxInvoker_8UC3(const uint8_t *src_data, size_t src |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2838,7 +2868,7 @@ void warpPerspectiveLinearApproxInvoker_8UC4(const uint8_t *src_data, size_t src |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -2961,7 +2991,7 @@ void remapLinearInvoker_8UC1(const uint8_t *src_data, size_t src_step, int src_r |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3100,7 +3130,7 @@ void remapLinearInvoker_8UC3(const uint8_t *src_data, size_t src_step, int src_r |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3247,7 +3277,7 @@ void remapLinearInvoker_8UC4(const uint8_t *src_data, size_t src_step, int src_r |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3292,11 +3322,11 @@ void remapLinearInvoker_8UC4(const uint8_t *src_data, size_t src_step, int src_r |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_8UC4(); |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(8U); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
uint8_t pixbuf[max_uf*4*4]; |
|
|
|
@ -3378,7 +3408,7 @@ void remapLinearInvoker_16UC1(const uint16_t *src_data, size_t src_step, int src |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3496,7 +3526,7 @@ void remapLinearInvoker_16UC3(const uint16_t *src_data, size_t src_step, int src |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3618,7 +3648,7 @@ void remapLinearInvoker_16UC4(const uint16_t *src_data, size_t src_step, int src |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3628,7 +3658,6 @@ void remapLinearInvoker_16UC4(const uint16_t *src_data, size_t src_step, int src |
|
|
|
|
int32_t addr[max_uf], |
|
|
|
|
src_ix[max_uf], |
|
|
|
|
src_iy[max_uf]; |
|
|
|
|
uint16_t pixbuf[max_uf*4*4]; |
|
|
|
|
|
|
|
|
|
uint16_t bvalbuf[max_uf*4]; |
|
|
|
|
for (int i = 0; i < uf; i++) { |
|
|
|
@ -3658,18 +3687,26 @@ void remapLinearInvoker_16UC4(const uint16_t *src_data, size_t src_step, int src |
|
|
|
|
CV_REMAP_LINEAR_VECTOR_COMPUTE_MAPPED_COORD2(C4); |
|
|
|
|
|
|
|
|
|
if (v_reduce_min(inner_mask) != 0) { // all loaded pixels are completely inside the image
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(C4, 16U); |
|
|
|
|
float valpha[max_uf], vbeta[max_uf]; |
|
|
|
|
vx_store(valpha, src_x0); |
|
|
|
|
vx_store(valpha+vlanes_32, src_x1); |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(16U); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
uint16_t pixbuf[max_uf*4*4]; |
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(C4, 16U); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(C4); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_U16(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CONVERT_U16F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32U16(C4); |
|
|
|
|
} |
|
|
|
|
#endif // (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
|
|
|
|
|
@ -3742,7 +3779,7 @@ void remapLinearInvoker_32FC1(const float *src_data, size_t src_step, int src_ro |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3859,7 +3896,7 @@ void remapLinearInvoker_32FC3(const float *src_data, size_t src_step, int src_ro |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3982,7 +4019,7 @@ void remapLinearInvoker_32FC4(const float *src_data, size_t src_step, int src_ro |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -3992,7 +4029,6 @@ void remapLinearInvoker_32FC4(const float *src_data, size_t src_step, int src_ro |
|
|
|
|
int32_t addr[max_uf], |
|
|
|
|
src_ix[max_uf], |
|
|
|
|
src_iy[max_uf]; |
|
|
|
|
float pixbuf[max_uf*4*4]; |
|
|
|
|
|
|
|
|
|
float bvalbuf[max_uf*4]; |
|
|
|
|
for (int i = 0; i < uf; i++) { |
|
|
|
@ -4026,16 +4062,25 @@ void remapLinearInvoker_32FC4(const float *src_data, size_t src_step, int src_ro |
|
|
|
|
CV_REMAP_LINEAR_VECTOR_COMPUTE_MAPPED_COORD2(C4); |
|
|
|
|
|
|
|
|
|
if (v_reduce_min(inner_mask) != 0) { // all loaded pixels are completely inside the image
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_ALLWITHIN(C4, 32F); |
|
|
|
|
float valpha[max_uf], vbeta[max_uf]; |
|
|
|
|
vx_store(valpha, src_x0); |
|
|
|
|
vx_store(valpha+vlanes_32, src_x1); |
|
|
|
|
vx_store(vbeta, src_y0); |
|
|
|
|
vx_store(vbeta+vlanes_32, src_y1); |
|
|
|
|
#if CV_SIMD256 |
|
|
|
|
CV_WARP_SIMD256_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#elif CV_SIMD128 |
|
|
|
|
CV_WARP_SIMD128_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#elif CV_SIMD_SCALABLE |
|
|
|
|
CV_WARP_SIMDX_LOAD_SHUFFLE_INTER_C4(32F); |
|
|
|
|
#endif |
|
|
|
|
} else { |
|
|
|
|
float pixbuf[max_uf*4*4]; |
|
|
|
|
CV_WARP_LINEAR_VECTOR_SHUFFLE_NOTALLWITHIN(C4, 32F); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(C4); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_LOAD_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_CALC_F32(C4); |
|
|
|
|
|
|
|
|
|
CV_WARP_LINEAR_VECTOR_INTER_STORE_F32F32(C4); |
|
|
|
|
} |
|
|
|
|
#endif // (CV_SIMD || CV_SIMD_SCALABLE)
|
|
|
|
|
|
|
|
|
@ -4107,7 +4152,7 @@ void remapLinearApproxInvoker_8UC1(const uint8_t *src_data, size_t src_step, int |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -4229,7 +4274,7 @@ void remapLinearApproxInvoker_8UC3(const uint8_t *src_data, size_t src_step, int |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
@ -4359,7 +4404,7 @@ void remapLinearApproxInvoker_8UC4(const uint8_t *src_data, size_t src_step, int |
|
|
|
|
std::array<float, max_vlanes_32> start_indices; |
|
|
|
|
std::iota(start_indices.data(), start_indices.data() + max_vlanes_32, 0.f); |
|
|
|
|
|
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 2), |
|
|
|
|
v_uint32 inner_srows = vx_setall_u32((unsigned)srcrows - 1), |
|
|
|
|
inner_scols = vx_setall_u32((unsigned)srccols - 1), |
|
|
|
|
outer_srows = vx_setall_u32((unsigned)srcrows + 1), |
|
|
|
|
outer_scols = vx_setall_u32((unsigned)srccols + 1); |
|
|
|
|