mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
773 lines
26 KiB
773 lines
26 KiB
/* |
|
* By downloading, copying, installing or using the software you agree to this license. |
|
* If you do not agree to this license, do not download, install, |
|
* copy or use the software. |
|
* |
|
* |
|
* License Agreement |
|
* For Open Source Computer Vision Library |
|
* (3-clause BSD License) |
|
* |
|
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved. |
|
* Third party copyrights are property of their respective owners. |
|
* |
|
* Redistribution and use in source and binary forms, with or without modification, |
|
* are permitted provided that the following conditions are met: |
|
* |
|
* * Redistributions of source code must retain the above copyright notice, |
|
* this list of conditions and the following disclaimer. |
|
* |
|
* * Redistributions in binary form must reproduce the above copyright notice, |
|
* this list of conditions and the following disclaimer in the documentation |
|
* and/or other materials provided with the distribution. |
|
* |
|
* * Neither the names of the copyright holders nor the names of the contributors |
|
* may be used to endorse or promote products derived from this software |
|
* without specific prior written permission. |
|
* |
|
* This software is provided by the copyright holders and contributors "as is" and |
|
* any express or implied warranties, including, but not limited to, the implied |
|
* warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
* In no event shall copyright holders or contributors be liable for any direct, |
|
* indirect, incidental, special, exemplary, or consequential damages |
|
* (including, but not limited to, procurement of substitute goods or services; |
|
* loss of use, data, or profits; or business interruption) however caused |
|
* and on any theory of liability, whether in contract, strict liability, |
|
* or tort (including negligence or otherwise) arising in any way out of |
|
* the use of this software, even if advised of the possibility of such damage. |
|
*/ |
|
|
|
#include "common.hpp" |
|
|
|
#include "saturate_cast.hpp" |
|
#include <vector> |
|
#include <cstring> |
|
|
|
namespace CAROTENE_NS { |
|
|
|
#ifdef CAROTENE_NEON |
|
namespace { |
|
struct RowFilter3x3Canny |
|
{ |
|
inline RowFilter3x3Canny(const ptrdiff_t borderxl, const ptrdiff_t borderxr) |
|
{ |
|
vfmask = vreinterpret_u8_u64(vmov_n_u64(borderxl ? 0x0000FFffFFffFFffULL : 0x0100FFffFFffFFffULL)); |
|
vtmask = vreinterpret_u8_u64(vmov_n_u64(borderxr ? 0x0707060504030201ULL : 0x0706050403020100ULL)); |
|
lookLeft = offsetk - borderxl; |
|
lookRight = offsetk - borderxr; |
|
} |
|
|
|
inline void operator()(const u8* src, s16* dstx, s16* dsty, ptrdiff_t width) |
|
{ |
|
uint8x8_t l = vtbl1_u8(vld1_u8(src - lookLeft), vfmask); |
|
ptrdiff_t i = 0; |
|
for (; i < width - 8 + lookRight; i += 8) |
|
{ |
|
internal::prefetch(src + i); |
|
uint8x8_t l18u = vld1_u8(src + i + 1); |
|
|
|
uint8x8_t l2 = l18u; |
|
uint8x8_t l0 = vext_u8(l, l18u, 6); |
|
int16x8_t l1x2 = vreinterpretq_s16_u16(vshll_n_u8(vext_u8(l, l18u, 7), 1)); |
|
|
|
l = l18u; |
|
|
|
int16x8_t l02 = vreinterpretq_s16_u16(vaddl_u8(l2, l0)); |
|
int16x8_t ldx = vreinterpretq_s16_u16(vsubl_u8(l2, l0)); |
|
int16x8_t ldy = vaddq_s16(l02, l1x2); |
|
|
|
vst1q_s16(dstx + i, ldx); |
|
vst1q_s16(dsty + i, ldy); |
|
} |
|
|
|
//tail |
|
if (lookRight == 0 || i != width) |
|
{ |
|
uint8x8_t tail0 = vld1_u8(src + (width - 9));//can't get left 1 pixel another way if width==8*k+1 |
|
uint8x8_t tail2 = vtbl1_u8(vld1_u8(src + (width - 8 + lookRight)), vtmask); |
|
uint8x8_t tail1 = vext_u8(vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(tail0), 8*6)), tail2, 7); |
|
|
|
int16x8_t tail02 = vreinterpretq_s16_u16(vaddl_u8(tail2, tail0)); |
|
int16x8_t tail1x2 = vreinterpretq_s16_u16(vshll_n_u8(tail1, 1)); |
|
int16x8_t taildx = vreinterpretq_s16_u16(vsubl_u8(tail2, tail0)); |
|
int16x8_t taildy = vqaddq_s16(tail02, tail1x2); |
|
|
|
vst1q_s16(dstx + (width - 8), taildx); |
|
vst1q_s16(dsty + (width - 8), taildy); |
|
} |
|
} |
|
|
|
uint8x8_t vfmask; |
|
uint8x8_t vtmask; |
|
enum { offsetk = 1}; |
|
ptrdiff_t lookLeft; |
|
ptrdiff_t lookRight; |
|
}; |
|
|
|
template <bool L2gradient> |
|
inline void ColFilter3x3Canny(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width) |
|
{ |
|
ptrdiff_t j = 0; |
|
for (; j <= width - 8; j += 8) |
|
{ |
|
ColFilter3x3CannyL1Loop: |
|
int16x8_t line0x = vld1q_s16(src0 + j); |
|
int16x8_t line1x = vld1q_s16(src1 + j); |
|
int16x8_t line2x = vld1q_s16(src2 + j); |
|
int16x8_t line0y = vld1q_s16(src0 + j + width); |
|
int16x8_t line2y = vld1q_s16(src2 + j + width); |
|
|
|
int16x8_t l02 = vaddq_s16(line0x, line2x); |
|
int16x8_t l1x2 = vshlq_n_s16(line1x, 1); |
|
int16x8_t dy = vsubq_s16(line2y, line0y); |
|
int16x8_t dx = vaddq_s16(l1x2, l02); |
|
|
|
int16x8_t dya = vabsq_s16(dy); |
|
int16x8_t dxa = vabsq_s16(dx); |
|
int16x8_t norm = vaddq_s16(dya, dxa); |
|
|
|
int32x4_t normh = vmovl_s16(vget_high_s16(norm)); |
|
int32x4_t norml = vmovl_s16(vget_low_s16(norm)); |
|
|
|
vst1q_s16(dsty + j, dy); |
|
vst1q_s16(dstx + j, dx); |
|
vst1q_s32(mag + j + 4, normh); |
|
vst1q_s32(mag + j, norml); |
|
} |
|
if (j != width) |
|
{ |
|
j = width - 8; |
|
goto ColFilter3x3CannyL1Loop; |
|
} |
|
} |
|
template <> |
|
inline void ColFilter3x3Canny<true>(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width) |
|
{ |
|
ptrdiff_t j = 0; |
|
for (; j <= width - 8; j += 8) |
|
{ |
|
ColFilter3x3CannyL2Loop: |
|
int16x8_t line0x = vld1q_s16(src0 + j); |
|
int16x8_t line1x = vld1q_s16(src1 + j); |
|
int16x8_t line2x = vld1q_s16(src2 + j); |
|
int16x8_t line0y = vld1q_s16(src0 + j + width); |
|
int16x8_t line2y = vld1q_s16(src2 + j + width); |
|
|
|
int16x8_t l02 = vaddq_s16(line0x, line2x); |
|
int16x8_t l1x2 = vshlq_n_s16(line1x, 1); |
|
int16x8_t dy = vsubq_s16(line2y, line0y); |
|
int16x8_t dx = vaddq_s16(l1x2, l02); |
|
|
|
int32x4_t norml = vmull_s16(vget_low_s16(dx), vget_low_s16(dx)); |
|
int32x4_t normh = vmull_s16(vget_high_s16(dy), vget_high_s16(dy)); |
|
|
|
norml = vmlal_s16(norml, vget_low_s16(dy), vget_low_s16(dy)); |
|
normh = vmlal_s16(normh, vget_high_s16(dx), vget_high_s16(dx)); |
|
|
|
vst1q_s16(dsty + j, dy); |
|
vst1q_s16(dstx + j, dx); |
|
vst1q_s32(mag + j, norml); |
|
vst1q_s32(mag + j + 4, normh); |
|
} |
|
if (j != width) |
|
{ |
|
j = width - 8; |
|
goto ColFilter3x3CannyL2Loop; |
|
} |
|
} |
|
|
|
template <bool L2gradient> |
|
inline void NormCanny(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm) |
|
{ |
|
ptrdiff_t j = 0; |
|
if (colscn >= 8) |
|
{ |
|
int16x8_t vx = vld1q_s16(_dx); |
|
int16x8_t vy = vld1q_s16(_dy); |
|
for (; j <= colscn - 16; j+=8) |
|
{ |
|
internal::prefetch(_dx); |
|
internal::prefetch(_dy); |
|
|
|
int16x8_t vx2 = vld1q_s16(_dx + j + 8); |
|
int16x8_t vy2 = vld1q_s16(_dy + j + 8); |
|
|
|
int16x8_t vabsx = vabsq_s16(vx); |
|
int16x8_t vabsy = vabsq_s16(vy); |
|
|
|
int16x8_t norm = vaddq_s16(vabsx, vabsy); |
|
|
|
int32x4_t normh = vmovl_s16(vget_high_s16(norm)); |
|
int32x4_t norml = vmovl_s16(vget_low_s16(norm)); |
|
|
|
vst1q_s32(_norm + j + 4, normh); |
|
vst1q_s32(_norm + j + 0, norml); |
|
|
|
vx = vx2; |
|
vy = vy2; |
|
} |
|
int16x8_t vabsx = vabsq_s16(vx); |
|
int16x8_t vabsy = vabsq_s16(vy); |
|
|
|
int16x8_t norm = vaddq_s16(vabsx, vabsy); |
|
|
|
int32x4_t normh = vmovl_s16(vget_high_s16(norm)); |
|
int32x4_t norml = vmovl_s16(vget_low_s16(norm)); |
|
|
|
vst1q_s32(_norm + j + 4, normh); |
|
vst1q_s32(_norm + j + 0, norml); |
|
} |
|
for (; j < colscn; j++) |
|
_norm[j] = std::abs(s32(_dx[j])) + std::abs(s32(_dy[j])); |
|
} |
|
|
|
template <> |
|
inline void NormCanny<true>(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm) |
|
{ |
|
ptrdiff_t j = 0; |
|
if (colscn >= 8) |
|
{ |
|
int16x8_t vx = vld1q_s16(_dx); |
|
int16x8_t vy = vld1q_s16(_dy); |
|
|
|
for (; j <= colscn - 16; j+=8) |
|
{ |
|
internal::prefetch(_dx); |
|
internal::prefetch(_dy); |
|
|
|
int16x8_t vxnext = vld1q_s16(_dx + j + 8); |
|
int16x8_t vynext = vld1q_s16(_dy + j + 8); |
|
|
|
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx)); |
|
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy)); |
|
|
|
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy)); |
|
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx)); |
|
|
|
vst1q_s32(_norm + j + 0, norml); |
|
vst1q_s32(_norm + j + 4, normh); |
|
|
|
vx = vxnext; |
|
vy = vynext; |
|
} |
|
int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx)); |
|
int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy)); |
|
|
|
norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy)); |
|
normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx)); |
|
|
|
vst1q_s32(_norm + j + 0, norml); |
|
vst1q_s32(_norm + j + 4, normh); |
|
} |
|
for (; j < colscn; j++) |
|
_norm[j] = s32(_dx[j])*_dx[j] + s32(_dy[j])*_dy[j]; |
|
} |
|
|
|
template <bool L2gradient> |
|
inline void prepareThresh(f64 low_thresh, f64 high_thresh, |
|
s32 &low, s32 &high) |
|
{ |
|
if (low_thresh > high_thresh) |
|
std::swap(low_thresh, high_thresh); |
|
#if defined __GNUC__ |
|
low = (s32)low_thresh; |
|
high = (s32)high_thresh; |
|
low -= (low > low_thresh); |
|
high -= (high > high_thresh); |
|
#else |
|
low = internal::round(low_thresh); |
|
high = internal::round(high_thresh); |
|
f32 ldiff = (f32)(low_thresh - low); |
|
f32 hdiff = (f32)(high_thresh - high); |
|
low -= (ldiff < 0); |
|
high -= (hdiff < 0); |
|
#endif |
|
} |
|
template <> |
|
inline void prepareThresh<true>(f64 low_thresh, f64 high_thresh, |
|
s32 &low, s32 &high) |
|
{ |
|
if (low_thresh > high_thresh) |
|
std::swap(low_thresh, high_thresh); |
|
if (low_thresh > 0) low_thresh *= low_thresh; |
|
if (high_thresh > 0) high_thresh *= high_thresh; |
|
#if defined __GNUC__ |
|
low = (s32)low_thresh; |
|
high = (s32)high_thresh; |
|
low -= (low > low_thresh); |
|
high -= (high > high_thresh); |
|
#else |
|
low = internal::round(low_thresh); |
|
high = internal::round(high_thresh); |
|
f32 ldiff = (f32)(low_thresh - low); |
|
f32 hdiff = (f32)(high_thresh - high); |
|
low -= (ldiff < 0); |
|
high -= (hdiff < 0); |
|
#endif |
|
} |
|
|
|
template <bool L2gradient, bool externalSobel> |
|
struct _normEstimator |
|
{ |
|
ptrdiff_t magstep; |
|
ptrdiff_t dxOffset; |
|
ptrdiff_t dyOffset; |
|
ptrdiff_t shxOffset; |
|
ptrdiff_t shyOffset; |
|
std::vector<u8> buffer; |
|
const ptrdiff_t offsetk; |
|
ptrdiff_t borderyt, borderyb; |
|
RowFilter3x3Canny sobelRow; |
|
|
|
inline _normEstimator(const Size2D &size, s32, Margin borderMargin, |
|
ptrdiff_t &mapstep, s32** mag_buf, u8* &map): |
|
offsetk(1), |
|
sobelRow(std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.left), |
|
std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.right)) |
|
{ |
|
mapstep = size.width + 2; |
|
magstep = size.width + 2 + size.width * (4 * sizeof(s16)/sizeof(s32)); |
|
dxOffset = mapstep * sizeof(s32)/sizeof(s16); |
|
dyOffset = dxOffset + size.width * 1; |
|
shxOffset = dxOffset + size.width * 2; |
|
shyOffset = dxOffset + size.width * 3; |
|
buffer.resize( (size.width+2)*(size.height+2) + magstep*3*sizeof(s32) ); |
|
mag_buf[0] = (s32*)&buffer[0]; |
|
mag_buf[1] = mag_buf[0] + magstep; |
|
mag_buf[2] = mag_buf[1] + magstep; |
|
memset(mag_buf[0], 0, mapstep * sizeof(s32)); |
|
|
|
map = (u8*)(mag_buf[2] + magstep); |
|
memset(map, 1, mapstep); |
|
memset(map + mapstep*(size.height + 1), 1, mapstep); |
|
borderyt = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.top); |
|
borderyb = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.bottom); |
|
} |
|
inline void firstRow(const Size2D &size, s32, |
|
const u8 *srcBase, ptrdiff_t srcStride, |
|
s16*, ptrdiff_t, |
|
s16*, ptrdiff_t, |
|
s32** mag_buf) |
|
{ |
|
//sobelH row #0 |
|
const u8* _src = internal::getRowPtr(srcBase, srcStride, 0); |
|
sobelRow(_src, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shyOffset, size.width); |
|
//sobelH row #1 |
|
_src = internal::getRowPtr(srcBase, srcStride, 1); |
|
sobelRow(_src, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shyOffset, size.width); |
|
|
|
mag_buf[1][0] = mag_buf[1][size.width+1] = 0; |
|
if (borderyt == 0) |
|
{ |
|
//sobelH row #-1 |
|
_src = internal::getRowPtr(srcBase, srcStride, -1); |
|
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width); |
|
|
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, |
|
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width); |
|
} |
|
else |
|
{ |
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, |
|
((s16*)mag_buf[1]) + dxOffset, ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width); |
|
} |
|
} |
|
inline void nextRow(const Size2D &size, s32, |
|
const u8 *srcBase, ptrdiff_t srcStride, |
|
s16*, ptrdiff_t, |
|
s16*, ptrdiff_t, |
|
const ptrdiff_t &mapstep, s32** mag_buf, |
|
size_t i, const s16* &_x, const s16* &_y) |
|
{ |
|
mag_buf[2][0] = mag_buf[2][size.width+1] = 0; |
|
if (i < size.height - borderyb) |
|
{ |
|
const u8* _src = internal::getRowPtr(srcBase, srcStride, i+1); |
|
//sobelH row #i+1 |
|
sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width); |
|
|
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[2]) + shxOffset, |
|
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width); |
|
} |
|
else if (i < size.height) |
|
{ |
|
ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, |
|
((s16*)mag_buf[2]) + dxOffset, ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width); |
|
} |
|
else |
|
memset(mag_buf[2], 0, mapstep*sizeof(s32)); |
|
_x = ((s16*)mag_buf[1]) + dxOffset; |
|
_y = ((s16*)mag_buf[1]) + dyOffset; |
|
} |
|
}; |
|
template <bool L2gradient> |
|
struct _normEstimator<L2gradient, true> |
|
{ |
|
std::vector<u8> buffer; |
|
|
|
inline _normEstimator(const Size2D &size, s32 cn, Margin, |
|
ptrdiff_t &mapstep, s32** mag_buf, u8* &map) |
|
{ |
|
mapstep = size.width + 2; |
|
buffer.resize( (size.width+2)*(size.height+2) + cn*mapstep*3*sizeof(s32) ); |
|
mag_buf[0] = (s32*)&buffer[0]; |
|
mag_buf[1] = mag_buf[0] + mapstep*cn; |
|
mag_buf[2] = mag_buf[1] + mapstep*cn; |
|
memset(mag_buf[0], 0, /* cn* */mapstep * sizeof(s32)); |
|
|
|
map = (u8*)(mag_buf[2] + mapstep*cn); |
|
memset(map, 1, mapstep); |
|
memset(map + mapstep*(size.height + 1), 1, mapstep); |
|
} |
|
inline void firstRow(const Size2D &size, s32 cn, |
|
const u8 *, ptrdiff_t, |
|
s16* dxBase, ptrdiff_t dxStride, |
|
s16* dyBase, ptrdiff_t dyStride, |
|
s32** mag_buf) |
|
{ |
|
s32* _norm = mag_buf[1] + 1; |
|
|
|
s16* _dx = internal::getRowPtr(dxBase, dxStride, 0); |
|
s16* _dy = internal::getRowPtr(dyBase, dyStride, 0); |
|
|
|
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm); |
|
|
|
if(cn > 1) |
|
{ |
|
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn) |
|
{ |
|
size_t maxIdx = jn; |
|
for(s32 k = 1; k < cn; ++k) |
|
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k; |
|
_norm[j] = _norm[maxIdx]; |
|
_dx[j] = _dx[maxIdx]; |
|
_dy[j] = _dy[maxIdx]; |
|
} |
|
} |
|
|
|
_norm[-1] = _norm[size.width] = 0; |
|
} |
|
inline void nextRow(const Size2D &size, s32 cn, |
|
const u8 *, ptrdiff_t, |
|
s16* dxBase, ptrdiff_t dxStride, |
|
s16* dyBase, ptrdiff_t dyStride, |
|
const ptrdiff_t &mapstep, s32** mag_buf, |
|
size_t i, const s16* &_x, const s16* &_y) |
|
{ |
|
s32* _norm = mag_buf[(i > 0) + 1] + 1; |
|
if (i < size.height) |
|
{ |
|
s16* _dx = internal::getRowPtr(dxBase, dxStride, i); |
|
s16* _dy = internal::getRowPtr(dyBase, dyStride, i); |
|
|
|
NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm); |
|
|
|
if(cn > 1) |
|
{ |
|
for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn) |
|
{ |
|
size_t maxIdx = jn; |
|
for(s32 k = 1; k < cn; ++k) |
|
if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k; |
|
_norm[j] = _norm[maxIdx]; |
|
_dx[j] = _dx[maxIdx]; |
|
_dy[j] = _dy[maxIdx]; |
|
} |
|
} |
|
|
|
_norm[-1] = _norm[size.width] = 0; |
|
} |
|
else |
|
memset(_norm-1, 0, /* cn* */mapstep*sizeof(s32)); |
|
|
|
_x = internal::getRowPtr(dxBase, dxStride, i-1); |
|
_y = internal::getRowPtr(dyBase, dyStride, i-1); |
|
} |
|
}; |
|
|
|
template <bool L2gradient, bool externalSobel> |
|
inline void Canny3x3(const Size2D &size, s32 cn, |
|
const u8 * srcBase, ptrdiff_t srcStride, |
|
u8 * dstBase, ptrdiff_t dstStride, |
|
s16 * dxBase, ptrdiff_t dxStride, |
|
s16 * dyBase, ptrdiff_t dyStride, |
|
f64 low_thresh, f64 high_thresh, |
|
Margin borderMargin) |
|
{ |
|
s32 low, high; |
|
prepareThresh<L2gradient>(low_thresh, high_thresh, low, high); |
|
|
|
ptrdiff_t mapstep; |
|
s32* mag_buf[3]; |
|
u8* map; |
|
_normEstimator<L2gradient, externalSobel> normEstimator(size, cn, borderMargin, mapstep, mag_buf, map); |
|
|
|
size_t maxsize = std::max<size_t>( 1u << 10, size.width * size.height / 10 ); |
|
std::vector<u8*> stack( maxsize ); |
|
u8 **stack_top = &stack[0]; |
|
u8 **stack_bottom = &stack[0]; |
|
|
|
/* sector numbers |
|
(Top-Left Origin) |
|
|
|
1 2 3 |
|
* * * |
|
* * * |
|
0*******0 |
|
* * * |
|
* * * |
|
3 2 1 |
|
*/ |
|
|
|
#define CANNY_PUSH(d) *(d) = u8(2), *stack_top++ = (d) |
|
#define CANNY_POP(d) (d) = *--stack_top |
|
|
|
//i == 0 |
|
normEstimator.firstRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mag_buf); |
|
// calculate magnitude and angle of gradient, perform non-maxima supression. |
|
// fill the map with one of the following values: |
|
// 0 - the pixel might belong to an edge |
|
// 1 - the pixel can not belong to an edge |
|
// 2 - the pixel does belong to an edge |
|
for (size_t i = 1; i <= size.height; i++) |
|
{ |
|
const s16 *_x, *_y; |
|
normEstimator.nextRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mapstep, mag_buf, i, _x, _y); |
|
|
|
u8* _map = map + mapstep*i + 1; |
|
_map[-1] = _map[size.width] = 1; |
|
|
|
s32* _mag = mag_buf[1] + 1; // take the central row |
|
ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1]; |
|
ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1]; |
|
|
|
if ((stack_top - stack_bottom) + size.width > maxsize) |
|
{ |
|
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom); |
|
maxsize = maxsize * 3/2; |
|
stack.resize(maxsize); |
|
stack_bottom = &stack[0]; |
|
stack_top = stack_bottom + sz; |
|
} |
|
|
|
s32 prev_flag = 0; |
|
for (ptrdiff_t j = 0; j < (ptrdiff_t)size.width; j++) |
|
{ |
|
#define CANNY_SHIFT 15 |
|
const s32 TG22 = (s32)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5); |
|
|
|
s32 m = _mag[j]; |
|
|
|
if (m > low) |
|
{ |
|
s32 xs = _x[j]; |
|
s32 ys = _y[j]; |
|
s32 x = abs(xs); |
|
s32 y = abs(ys) << CANNY_SHIFT; |
|
|
|
s32 tg22x = x * TG22; |
|
|
|
if (y < tg22x) |
|
{ |
|
if (m > _mag[j-1] && m >= _mag[j+1]) goto __push; |
|
} |
|
else |
|
{ |
|
s32 tg67x = tg22x + (x << (CANNY_SHIFT+1)); |
|
if (y > tg67x) |
|
{ |
|
if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __push; |
|
} |
|
else |
|
{ |
|
s32 s = (xs ^ ys) < 0 ? -1 : 1; |
|
if(m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __push; |
|
} |
|
} |
|
} |
|
prev_flag = 0; |
|
_map[j] = u8(1); |
|
continue; |
|
__push: |
|
if (!prev_flag && m > high && _map[j-mapstep] != 2) |
|
{ |
|
CANNY_PUSH(_map + j); |
|
prev_flag = 1; |
|
} |
|
else |
|
_map[j] = 0; |
|
} |
|
|
|
// scroll the ring buffer |
|
_mag = mag_buf[0]; |
|
mag_buf[0] = mag_buf[1]; |
|
mag_buf[1] = mag_buf[2]; |
|
mag_buf[2] = _mag; |
|
} |
|
|
|
// now track the edges (hysteresis thresholding) |
|
while (stack_top > stack_bottom) |
|
{ |
|
u8* m; |
|
if ((size_t)(stack_top - stack_bottom) + 8u > maxsize) |
|
{ |
|
ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom); |
|
maxsize = maxsize * 3/2; |
|
stack.resize(maxsize); |
|
stack_bottom = &stack[0]; |
|
stack_top = stack_bottom + sz; |
|
} |
|
|
|
CANNY_POP(m); |
|
|
|
if (!m[-1]) CANNY_PUSH(m - 1); |
|
if (!m[1]) CANNY_PUSH(m + 1); |
|
if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1); |
|
if (!m[-mapstep]) CANNY_PUSH(m - mapstep); |
|
if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1); |
|
if (!m[mapstep-1]) CANNY_PUSH(m + mapstep - 1); |
|
if (!m[mapstep]) CANNY_PUSH(m + mapstep); |
|
if (!m[mapstep+1]) CANNY_PUSH(m + mapstep + 1); |
|
} |
|
|
|
// the final pass, form the final image |
|
uint8x16_t v2 = vmovq_n_u8(2); |
|
const u8* ptrmap = map + mapstep + 1; |
|
for (size_t i = 0; i < size.height; i++, ptrmap += mapstep) |
|
{ |
|
u8* _dst = internal::getRowPtr(dstBase, dstStride, i); |
|
ptrdiff_t j = 0; |
|
for (; j < (ptrdiff_t)size.width - 16; j += 16) |
|
{ |
|
internal::prefetch(ptrmap); |
|
uint8x16_t vmap = vld1q_u8(ptrmap + j); |
|
uint8x16_t vdst = vceqq_u8(vmap, v2); |
|
vst1q_u8(_dst+j, vdst); |
|
} |
|
for (; j < (ptrdiff_t)size.width; j++) |
|
_dst[j] = (u8)-(ptrmap[j] >> 1); |
|
} |
|
} |
|
|
|
} // namespace |
|
#endif |
|
|
|
bool isCanny3x3Supported(const Size2D &size) |
|
{ |
|
return isSupportedConfiguration() && |
|
size.height >= 2 && size.width >= 9; |
|
} |
|
|
|
void Canny3x3L1(const Size2D &size, |
|
const u8 * srcBase, ptrdiff_t srcStride, |
|
u8 * dstBase, ptrdiff_t dstStride, |
|
f64 low_thresh, f64 high_thresh, |
|
Margin borderMargin) |
|
{ |
|
internal::assertSupportedConfiguration(isCanny3x3Supported(size)); |
|
#ifdef CAROTENE_NEON |
|
Canny3x3<false, false>(size, 1, |
|
srcBase, srcStride, |
|
dstBase, dstStride, |
|
NULL, 0, |
|
NULL, 0, |
|
low_thresh, high_thresh, |
|
borderMargin); |
|
#else |
|
(void)size; |
|
(void)srcBase; |
|
(void)srcStride; |
|
(void)dstBase; |
|
(void)dstStride; |
|
(void)low_thresh; |
|
(void)high_thresh; |
|
(void)borderMargin; |
|
#endif |
|
} |
|
|
|
void Canny3x3L2(const Size2D &size, |
|
const u8 * srcBase, ptrdiff_t srcStride, |
|
u8 * dstBase, ptrdiff_t dstStride, |
|
f64 low_thresh, f64 high_thresh, |
|
Margin borderMargin) |
|
{ |
|
internal::assertSupportedConfiguration(isCanny3x3Supported(size)); |
|
#ifdef CAROTENE_NEON |
|
Canny3x3<true, false>(size, 1, |
|
srcBase, srcStride, |
|
dstBase, dstStride, |
|
NULL, 0, |
|
NULL, 0, |
|
low_thresh, high_thresh, |
|
borderMargin); |
|
#else |
|
(void)size; |
|
(void)srcBase; |
|
(void)srcStride; |
|
(void)dstBase; |
|
(void)dstStride; |
|
(void)low_thresh; |
|
(void)high_thresh; |
|
(void)borderMargin; |
|
#endif |
|
} |
|
|
|
void Canny3x3L1(const Size2D &size, s32 cn, |
|
s16 * dxBase, ptrdiff_t dxStride, |
|
s16 * dyBase, ptrdiff_t dyStride, |
|
u8 * dstBase, ptrdiff_t dstStride, |
|
f64 low_thresh, f64 high_thresh) |
|
{ |
|
internal::assertSupportedConfiguration(); |
|
#ifdef CAROTENE_NEON |
|
Canny3x3<false, true>(size, cn, |
|
NULL, 0, |
|
dstBase, dstStride, |
|
dxBase, dxStride, |
|
dyBase, dyStride, |
|
low_thresh, high_thresh, |
|
Margin()); |
|
#else |
|
(void)size; |
|
(void)cn; |
|
(void)dstBase; |
|
(void)dstStride; |
|
(void)dxBase; |
|
(void)dxStride; |
|
(void)dyBase; |
|
(void)dyStride; |
|
(void)low_thresh; |
|
(void)high_thresh; |
|
#endif |
|
} |
|
|
|
void Canny3x3L2(const Size2D &size, s32 cn, |
|
s16 * dxBase, ptrdiff_t dxStride, |
|
s16 * dyBase, ptrdiff_t dyStride, |
|
u8 * dstBase, ptrdiff_t dstStride, |
|
f64 low_thresh, f64 high_thresh) |
|
{ |
|
internal::assertSupportedConfiguration(); |
|
#ifdef CAROTENE_NEON |
|
Canny3x3<true, true>(size, cn, |
|
NULL, 0, |
|
dstBase, dstStride, |
|
dxBase, dxStride, |
|
dyBase, dyStride, |
|
low_thresh, high_thresh, |
|
Margin()); |
|
#else |
|
(void)size; |
|
(void)cn; |
|
(void)dstBase; |
|
(void)dstStride; |
|
(void)dxBase; |
|
(void)dxStride; |
|
(void)dyBase; |
|
(void)dyStride; |
|
(void)low_thresh; |
|
(void)high_thresh; |
|
#endif |
|
} |
|
|
|
} // namespace CAROTENE_NS
|
|
|