Merge pull request #10095 from alalek:fix_canny_intrinsics

pull/10159/merge
Alexander Alekhin 7 years ago
commit 11330b9253
  1. 73
      modules/imgproc/src/canny.cpp

@ -812,8 +812,6 @@ public:
~finalPass() {}
finalPass& operator=(const finalPass&) {return *this;}
void operator()(const Range &boundaries) const
{
// the final pass, form the final image
@ -821,77 +819,39 @@ public:
{
int j = 0;
uchar *pdst = dst.ptr<uchar>(i);
uchar *pmap;
const uchar *pmap = map.ptr<uchar>(i + 1);
#if CV_SIMD128
if(haveSIMD)
pmap = (uchar*)map.ptr<uchar>(i + 1) + CV_MALLOC_SIMD128;
pmap += CV_MALLOC_SIMD128;
else
#endif
pmap = (uchar*)map.ptr<uchar>(i + 1) + 1;
pmap += 1;
#if CV_SIMD128
if(haveSIMD) {
const v_int8x16 v_zero = v_setzero_s8();
for(; j <= dst.cols - 32; j += 32) {
v_uint8x16 v_pmap1 = v_load_aligned((const unsigned char*)(pmap + j));
v_uint8x16 v_pmap2 = v_load_aligned((const unsigned char*)(pmap + j + 16));
v_uint16x8 v_pmaplo1, v_pmaphi1, v_pmaplo2, v_pmaphi2;
v_expand(v_pmap1, v_pmaplo1, v_pmaphi1);
v_expand(v_pmap2, v_pmaplo2, v_pmaphi2);
v_pmaplo1 = v_pmaplo1 >> 1;
v_pmaphi1 = v_pmaphi1 >> 1;
v_pmaplo2 = v_pmaplo2 >> 1;
v_pmaphi2 = v_pmaphi2 >> 1;
v_pmap1 = v_pack(v_pmaplo1, v_pmaphi1);
v_pmap2 = v_pack(v_pmaplo2, v_pmaphi2);
v_pmap1 = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap1));
v_pmap2 = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap2));
const v_uint8x16 v_zero = v_setzero_u8();
const v_uint8x16 v_ff = ~v_zero;
const v_uint8x16 v_two(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
v_store((pdst + j), v_pmap1);
v_store((pdst + j + 16), v_pmap2);
}
if(j <= dst.cols - 16) {
for (; j <= dst.cols - 16; j += 16)
{
v_uint8x16 v_pmap = v_load_aligned((const unsigned char*)(pmap + j));
v_uint16x8 v_pmaplo;
v_uint16x8 v_pmaphi;
v_expand(v_pmap, v_pmaplo, v_pmaphi);
v_pmaplo = v_pmaplo >> 1;
v_pmaphi = v_pmaphi >> 1;
v_pmap = v_pack(v_pmaplo, v_pmaphi);
v_pmap = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap));
v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
v_store((pdst + j), v_pmap);
j += 16;
}
if(j <= dst.cols - 8) {
v_uint8x16 v_pmap = v_load_halves((const unsigned char*)(pmap + j), (const unsigned char*)(pmap + j));
v_uint16x8 v_pmaplo;
v_uint16x8 v_pmaphi;
v_expand(v_pmap, v_pmaplo, v_pmaphi);
v_pmaplo = v_pmaplo >> 1;
v_pmaphi = v_pmaphi >> 1;
v_pmap = v_pack(v_pmaplo, v_pmaphi);
v_pmap = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap));
if (j <= dst.cols - 8)
{
v_uint8x16 v_pmap = v_load_low((const unsigned char*)(pmap + j));
v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
v_store_low((pdst + j), v_pmap);
j += 8;
}
}
#endif
for (; j < dst.cols; j++)
{
pdst[j] = (uchar)-(pmap[j] >> 1);
}
}
}
@ -901,6 +861,9 @@ private:
#if CV_SIMD128
bool haveSIMD;
#endif
finalPass(const finalPass&); // = delete
finalPass& operator=(const finalPass&); // = delete
};
#ifdef HAVE_OPENVX

Loading…
Cancel
Save