|
|
|
@ -43,518 +43,431 @@ |
|
|
|
|
// |
|
|
|
|
//M*/ |
|
|
|
|
|
|
|
|
|
#ifdef OP_SOBEL |
|
|
|
|
#define TG22 0.4142135623730950488016887242097f |
|
|
|
|
#define TG67 2.4142135623730950488016887242097f |
|
|
|
|
|
|
|
|
|
#if cn != 3 |
|
|
|
|
#define loadpix(addr) convertToIntT(*(__global const ucharT *)(addr)) |
|
|
|
|
#define storepix(val, addr) *(__global shortT *)(addr) = convertToShortT(val) |
|
|
|
|
#define shortSize (int)sizeof(shortT) |
|
|
|
|
#ifdef WITH_SOBEL |
|
|
|
|
|
|
|
|
|
#if cn == 1 |
|
|
|
|
#define loadpix(addr) convert_intN(*(__global const TYPE *)(addr)) |
|
|
|
|
#else |
|
|
|
|
#define loadpix(addr) convertToIntT(vload3(0, (__global const uchar *)(addr))) |
|
|
|
|
#define storepix(val, addr) vstore3(convertToShortT(val), 0, (__global short *)(addr)) |
|
|
|
|
#define shortSize (int)sizeof(short) * cn |
|
|
|
|
#define loadpix(addr) convert_intN(vload3(0, (__global const TYPE *)(addr))) |
|
|
|
|
#endif |
|
|
|
|
#define storepix(value, addr) *(__global int *)(addr) = (int)(value) |
|
|
|
|
|
|
|
|
|
/* |
|
|
|
|
stage1_with_sobel: |
|
|
|
|
Sobel operator |
|
|
|
|
Calc magnitudes |
|
|
|
|
Non maxima suppression |
|
|
|
|
Double thresholding |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
__constant int prev[4][2] = { |
|
|
|
|
{ 0, -1 }, |
|
|
|
|
{ -1, 1 }, |
|
|
|
|
{ -1, 0 }, |
|
|
|
|
{ -1, -1 } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
// Smoothing perpendicular to the derivative direction with a triangle filter |
|
|
|
|
// only support 3x3 Sobel kernel |
|
|
|
|
// h (-1) = 1, h (0) = 2, h (1) = 1 |
|
|
|
|
// h'(-1) = -1, h'(0) = 0, h'(1) = 1 |
|
|
|
|
// thus sobel 2D operator can be calculated as: |
|
|
|
|
// h'(x, y) = h'(x)h(y) for x direction |
|
|
|
|
// |
|
|
|
|
// src input 8bit single channel image data |
|
|
|
|
// dx_buf output dx buffer |
|
|
|
|
// dy_buf output dy buffer |
|
|
|
|
__constant int next[4][2] = { |
|
|
|
|
{ 0, 1 }, |
|
|
|
|
{ 1, -1 }, |
|
|
|
|
{ 1, 0 }, |
|
|
|
|
{ 1, 1 } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
__kernel void calcSobelRowPass(__global const uchar * src, int src_step, int src_offset, int rows, int cols, |
|
|
|
|
__global uchar * dx_buf, int dx_buf_step, int dx_buf_offset, |
|
|
|
|
__global uchar * dy_buf, int dy_buf_step, int dy_buf_offset) |
|
|
|
|
inline int3 sobel(int idx, __local const intN *smem) |
|
|
|
|
{ |
|
|
|
|
int gidx = get_global_id(0); |
|
|
|
|
int gidy = get_global_id(1); |
|
|
|
|
// result: x, y, mag |
|
|
|
|
int3 res; |
|
|
|
|
|
|
|
|
|
int lidx = get_local_id(0); |
|
|
|
|
int lidy = get_local_id(1); |
|
|
|
|
intN dx = smem[idx + 2] - smem[idx] |
|
|
|
|
+ 2 * (smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4]) |
|
|
|
|
+ smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]; |
|
|
|
|
|
|
|
|
|
__local intT smem[16][18]; |
|
|
|
|
intN dy = smem[idx] - smem[idx + 2 * GRP_SIZEX + 8] |
|
|
|
|
+ 2 * (smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9]) |
|
|
|
|
+ smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10]; |
|
|
|
|
|
|
|
|
|
smem[lidy][lidx + 1] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(gidx, cn, src_offset))); |
|
|
|
|
if (lidx == 0) |
|
|
|
|
#ifdef L2GRAD |
|
|
|
|
intN magN = dx * dx + dy * dy; |
|
|
|
|
#else |
|
|
|
|
intN magN = convert_intN(abs(dx) + abs(dy)); |
|
|
|
|
#endif |
|
|
|
|
#if cn == 1 |
|
|
|
|
res.z = magN; |
|
|
|
|
res.x = dx; |
|
|
|
|
res.y = dy; |
|
|
|
|
#else |
|
|
|
|
res.z = max(magN.x, max(magN.y, magN.z)); |
|
|
|
|
if (res.z == magN.y) |
|
|
|
|
{ |
|
|
|
|
smem[lidy][0] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(max(gidx - 1, 0), cn, src_offset))); |
|
|
|
|
smem[lidy][17] = loadpix(src + mad24(src_step, min(gidy, rows - 1), mad24(min(gidx + 16, cols - 1), cn, src_offset))); |
|
|
|
|
dx.x = dx.y; |
|
|
|
|
dy.x = dy.y; |
|
|
|
|
} |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
if (gidy < rows && gidx < cols) |
|
|
|
|
else if (res.z == magN.z) |
|
|
|
|
{ |
|
|
|
|
storepix(smem[lidy][lidx + 2] - smem[lidy][lidx], |
|
|
|
|
dx_buf + mad24(gidy, dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); |
|
|
|
|
storepix(mad24(2, smem[lidy][lidx + 1], smem[lidy][lidx] + smem[lidy][lidx + 2]), |
|
|
|
|
dy_buf + mad24(gidy, dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); |
|
|
|
|
dx.x = dx.z; |
|
|
|
|
dy.x = dy.z; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#elif defined OP_MAG_BUF || defined OP_MAG |
|
|
|
|
|
|
|
|
|
inline intT calc(shortT x, shortT y) |
|
|
|
|
{ |
|
|
|
|
#ifdef L2GRAD |
|
|
|
|
intT intx = convertToIntT(x), inty = convertToIntT(y); |
|
|
|
|
return intx * intx + inty * inty; |
|
|
|
|
#else |
|
|
|
|
return convertToIntT( (x >= (shortT)(0) ? x : -x) + (y >= (shortT)(0) ? y : -y) ); |
|
|
|
|
res.x = dx.x; |
|
|
|
|
res.y = dy.x; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#ifdef OP_MAG |
|
|
|
|
return res; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// calculate the magnitude of the filter pass combining both x and y directions |
|
|
|
|
// This is the non-buffered version(non-3x3 sobel) |
|
|
|
|
// |
|
|
|
|
// dx_buf dx buffer, calculated from calcSobelRowPass |
|
|
|
|
// dy_buf dy buffer, calculated from calcSobelRowPass |
|
|
|
|
// dx direvitive in x direction output |
|
|
|
|
// dy direvitive in y direction output |
|
|
|
|
// mag magnitude direvitive of xy output |
|
|
|
|
|
|
|
|
|
__kernel void calcMagnitude(__global const uchar * dxptr, int dx_step, int dx_offset, |
|
|
|
|
__global const uchar * dyptr, int dy_step, int dy_offset, |
|
|
|
|
__global uchar * magptr, int mag_step, int mag_offset, int rows, int cols) |
|
|
|
|
__kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src_offset, int rows, int cols, |
|
|
|
|
__global uchar *map, int map_step, int map_offset, |
|
|
|
|
int low_thr, int high_thr) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
__local intN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)]; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
{ |
|
|
|
|
int dx_index = mad24(dx_step, y, mad24(x, (int)sizeof(short) * cn, dx_offset)); |
|
|
|
|
int dy_index = mad24(dy_step, y, mad24(x, (int)sizeof(short) * cn, dy_offset)); |
|
|
|
|
int mag_index = mad24(mag_step, y + 1, mad24(x + 1, (int)sizeof(int), mag_offset)); |
|
|
|
|
|
|
|
|
|
__global short * dx = (__global short *)(dxptr + dx_index); |
|
|
|
|
__global short * dy = (__global short *)(dyptr + dy_index); |
|
|
|
|
__global int * mag = (__global int *)(magptr + mag_index); |
|
|
|
|
int lidx = get_local_id(0); |
|
|
|
|
int lidy = get_local_id(1); |
|
|
|
|
|
|
|
|
|
int cmag = calc(dx[0], dy[0]); |
|
|
|
|
#if cn > 1 |
|
|
|
|
short cx = dx[0], cy = dy[0]; |
|
|
|
|
int pmag; |
|
|
|
|
int start_x = GRP_SIZEX * get_group_id(0); |
|
|
|
|
int start_y = GRP_SIZEY * get_group_id(1); |
|
|
|
|
|
|
|
|
|
#pragma unroll |
|
|
|
|
for (int i = 1; i < cn; ++i) |
|
|
|
|
int i = lidx + lidy * GRP_SIZEX; |
|
|
|
|
for (int j = i; j < (GRP_SIZEX + 4) * (GRP_SIZEY + 4); j += GRP_SIZEX * GRP_SIZEY) |
|
|
|
|
{ |
|
|
|
|
pmag = calc(dx[i], dy[i]); |
|
|
|
|
if (pmag > cmag) |
|
|
|
|
cmag = pmag, cx = dx[i], cy = dy[i]; |
|
|
|
|
int x = clamp(start_x - 2 + (j % (GRP_SIZEX + 4)), 0, cols - 1); |
|
|
|
|
int y = clamp(start_y - 2 + (j / (GRP_SIZEX + 4)), 0, rows - 1); |
|
|
|
|
smem[j] = loadpix(src + mad24(y, src_step, mad24(x, cn * (int)sizeof(TYPE), src_offset))); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
dx[0] = cx, dy[0] = cy; |
|
|
|
|
#endif |
|
|
|
|
mag[0] = cmag; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
#elif defined OP_MAG_BUF |
|
|
|
|
//// Sobel, Magnitude |
|
|
|
|
// |
|
|
|
|
|
|
|
|
|
#if cn != 3 |
|
|
|
|
#define loadpix(addr) *(__global const shortT *)(addr) |
|
|
|
|
#define shortSize (int)sizeof(shortT) |
|
|
|
|
#else |
|
|
|
|
#define loadpix(addr) vload3(0, (__global const short *)(addr)) |
|
|
|
|
#define shortSize (int)sizeof(short)*cn |
|
|
|
|
#endif |
|
|
|
|
__local int mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)]; |
|
|
|
|
|
|
|
|
|
// calculate the magnitude of the filter pass combining both x and y directions |
|
|
|
|
// This is the buffered version(3x3 sobel) |
|
|
|
|
// |
|
|
|
|
// dx_buf dx buffer, calculated from calcSobelRowPass |
|
|
|
|
// dy_buf dy buffer, calculated from calcSobelRowPass |
|
|
|
|
// dx direvitive in x direction output |
|
|
|
|
// dy direvitive in y direction output |
|
|
|
|
// mag magnitude direvitive of xy output |
|
|
|
|
__kernel void calcMagnitude_buf(__global const uchar * dx_buf, int dx_buf_step, int dx_buf_offset, |
|
|
|
|
__global const uchar * dy_buf, int dy_buf_step, int dy_buf_offset, |
|
|
|
|
__global uchar * dx, int dx_step, int dx_offset, |
|
|
|
|
__global uchar * dy, int dy_step, int dy_offset, |
|
|
|
|
__global uchar * mag, int mag_step, int mag_offset, int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int gidx = get_global_id(0); |
|
|
|
|
int gidy = get_global_id(1); |
|
|
|
|
lidx++; |
|
|
|
|
lidy++; |
|
|
|
|
|
|
|
|
|
int lidx = get_local_id(0); |
|
|
|
|
int lidy = get_local_id(1); |
|
|
|
|
if (i < GRP_SIZEX + 2) |
|
|
|
|
{ |
|
|
|
|
int grp_sizey = min(GRP_SIZEY + 1, rows - start_y); |
|
|
|
|
mag[i] = (sobel(i, smem)).z; |
|
|
|
|
mag[i + grp_sizey * (GRP_SIZEX + 2)] = (sobel(i + grp_sizey * (GRP_SIZEX + 4), smem)).z; |
|
|
|
|
} |
|
|
|
|
if (i < GRP_SIZEY + 2) |
|
|
|
|
{ |
|
|
|
|
int grp_sizex = min(GRP_SIZEX + 1, cols - start_x); |
|
|
|
|
mag[i * (GRP_SIZEX + 2)] = (sobel(i * (GRP_SIZEX + 4), smem)).z; |
|
|
|
|
mag[i * (GRP_SIZEX + 2) + grp_sizex] = (sobel(i * (GRP_SIZEX + 4) + grp_sizex, smem)).z; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__local shortT sdx[18][16]; |
|
|
|
|
__local shortT sdy[18][16]; |
|
|
|
|
int idx = lidx + lidy * (GRP_SIZEX + 4); |
|
|
|
|
i = lidx + lidy * (GRP_SIZEX + 2); |
|
|
|
|
|
|
|
|
|
sdx[lidy + 1][lidx] = loadpix(dx_buf + mad24(min(gidy, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); |
|
|
|
|
sdy[lidy + 1][lidx] = loadpix(dy_buf + mad24(min(gidy, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); |
|
|
|
|
if (lidy == 0) |
|
|
|
|
{ |
|
|
|
|
sdx[0][lidx] = loadpix(dx_buf + mad24(clamp(gidy - 1, 0, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); |
|
|
|
|
sdx[17][lidx] = loadpix(dx_buf + mad24(min(gidy + 16, rows - 1), dx_buf_step, mad24(gidx, shortSize, dx_buf_offset))); |
|
|
|
|
int3 res = sobel(idx, smem); |
|
|
|
|
mag[i] = res.z; |
|
|
|
|
int x = res.x; |
|
|
|
|
int y = res.y; |
|
|
|
|
|
|
|
|
|
sdy[0][lidx] = loadpix(dy_buf + mad24(clamp(gidy - 1, 0, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); |
|
|
|
|
sdy[17][lidx] = loadpix(dy_buf + mad24(min(gidy + 16, rows - 1), dy_buf_step, mad24(gidx, shortSize, dy_buf_offset))); |
|
|
|
|
} |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
if (gidx < cols && gidy < rows) |
|
|
|
|
{ |
|
|
|
|
shortT x = sdx[lidy + 1][lidx] * (shortT)(2) + sdx[lidy][lidx] + sdx[lidy + 2][lidx]; |
|
|
|
|
shortT y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx]; |
|
|
|
|
//// Threshold + Non maxima suppression |
|
|
|
|
// |
|
|
|
|
|
|
|
|
|
#if cn == 1 |
|
|
|
|
*(__global short *)(dx + mad24(gidy, dx_step, mad24(gidx, shortSize, dx_offset))) = x; |
|
|
|
|
*(__global short *)(dy + mad24(gidy, dy_step, mad24(gidx, shortSize, dy_offset))) = y; |
|
|
|
|
/* |
|
|
|
|
Sector numbers |
|
|
|
|
|
|
|
|
|
*(__global int *)(mag + mad24(gidy + 1, mag_step, mad24(gidx + 1, (int)sizeof(int), mag_offset))) = calc(x, y); |
|
|
|
|
#elif cn == 3 |
|
|
|
|
intT magv = calc(x, y); |
|
|
|
|
short cx = x.x, cy = y.x; |
|
|
|
|
int cmag = magv.x; |
|
|
|
|
3 2 1 |
|
|
|
|
* * * |
|
|
|
|
* * * |
|
|
|
|
0*******0 |
|
|
|
|
* * * |
|
|
|
|
* * * |
|
|
|
|
1 2 3 |
|
|
|
|
|
|
|
|
|
if (cmag < magv.y) |
|
|
|
|
cx = x.y, cy = y.y, cmag = magv.y; |
|
|
|
|
if (cmag < magv.z) |
|
|
|
|
cx = x.z, cy = y.z, cmag = magv.z; |
|
|
|
|
We need to determine arctg(dy / dx) to one of the four directions: 0, 45, 90 or 135 degrees. |
|
|
|
|
Therefore if abs(dy / dx) belongs to the interval |
|
|
|
|
[0, tg(22.5)] -> 0 direction |
|
|
|
|
[tg(22.5), tg(67.5)] -> 1 or 3 |
|
|
|
|
[tg(67,5), +oo) -> 2 |
|
|
|
|
|
|
|
|
|
*(__global short *)(dx + mad24(gidy, dx_step, mad24(gidx, shortSize, dx_offset))) = cx; |
|
|
|
|
*(__global short *)(dy + mad24(gidy, dy_step, mad24(gidx, shortSize, dy_offset))) = cy; |
|
|
|
|
Since tg(67.5) = 1 / tg(22.5), if we take |
|
|
|
|
a = abs(dy / dx) * tg(22.5) and b = abs(dy / dx) * tg(67.5) |
|
|
|
|
we can get another intervals |
|
|
|
|
|
|
|
|
|
*(__global int *)(mag + mad24(gidy + 1, mag_step, mad24(gidx + 1, (int)sizeof(int), mag_offset))) = cmag; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
in case a: |
|
|
|
|
[0, tg(22.5)^2] -> 0 |
|
|
|
|
[tg(22.5)^2, 1] -> 1, 3 |
|
|
|
|
[1, +oo) -> 2 |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|
in case b: |
|
|
|
|
[0, 1] -> 0 |
|
|
|
|
[1, tg(67.5)^2] -> 1,3 |
|
|
|
|
[tg(67.5)^2, +oo) -> 2 |
|
|
|
|
|
|
|
|
|
#elif defined OP_MAP |
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////// |
|
|
|
|
// 0.4142135623730950488016887242097 is tan(22.5) |
|
|
|
|
|
|
|
|
|
#define CANNY_SHIFT 15 |
|
|
|
|
#define TG22 (int)(0.4142135623730950488016887242097f*(1<<CANNY_SHIFT) + 0.5f) |
|
|
|
|
|
|
|
|
|
// First pass of edge detection and non-maximum suppression |
|
|
|
|
// edgetype is set to for each pixel: |
|
|
|
|
// 0 - below low thres, not an edge |
|
|
|
|
// 1 - maybe an edge |
|
|
|
|
// 2 - is an edge, either magnitude is greater than high thres, or |
|
|
|
|
// Given estimates of the image gradients, a search is then carried out |
|
|
|
|
// to determine if the gradient magnitude assumes a local maximum in the gradient direction. |
|
|
|
|
// if the rounded gradient angle is zero degrees (i.e. the edge is in the north-south direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the west and east directions, |
|
|
|
|
// if the rounded gradient angle is 90 degrees (i.e. the edge is in the east-west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north and south directions, |
|
|
|
|
// if the rounded gradient angle is 135 degrees (i.e. the edge is in the north east-south west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north west and south east directions, |
|
|
|
|
// if the rounded gradient angle is 45 degrees (i.e. the edge is in the north west-south east direction)the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north east and south west directions. |
|
|
|
|
// |
|
|
|
|
// dx, dy direvitives of x and y direction |
|
|
|
|
// mag magnitudes calculated from calcMagnitude function |
|
|
|
|
// map output containing raw edge types |
|
|
|
|
|
|
|
|
|
__kernel void calcMap(__global const uchar * dx, int dx_step, int dx_offset, |
|
|
|
|
__global const uchar * dy, int dy_step, int dy_offset, |
|
|
|
|
__global const uchar * mag, int mag_step, int mag_offset, |
|
|
|
|
__global uchar * map, int map_step, int map_offset, |
|
|
|
|
int rows, int cols, int low_thresh, int high_thresh) |
|
|
|
|
{ |
|
|
|
|
__local int smem[18][18]; |
|
|
|
|
that can help to find direction without conditions. |
|
|
|
|
|
|
|
|
|
0 - might belong to an edge |
|
|
|
|
1 - pixel doesn't belong to an edge |
|
|
|
|
2 - belong to an edge |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
int gidx = get_global_id(0); |
|
|
|
|
int gidy = get_global_id(1); |
|
|
|
|
|
|
|
|
|
int lidx = get_local_id(0); |
|
|
|
|
int lidy = get_local_id(1); |
|
|
|
|
|
|
|
|
|
int grp_idx = get_global_id(0) & 0xFFFFF0; |
|
|
|
|
int grp_idy = get_global_id(1) & 0xFFFFF0; |
|
|
|
|
|
|
|
|
|
int tid = mad24(lidy, 16, lidx); |
|
|
|
|
int lx = tid % 18; |
|
|
|
|
int ly = tid / 18; |
|
|
|
|
if (gidx >= cols || gidy >= rows) |
|
|
|
|
return; |
|
|
|
|
|
|
|
|
|
mag += mag_offset; |
|
|
|
|
if (ly < 14) |
|
|
|
|
smem[ly][lx] = *(__global const int *)(mag + |
|
|
|
|
mad24(mag_step, min(grp_idy + ly, rows - 1), (int)sizeof(int) * (grp_idx + lx))); |
|
|
|
|
if (ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols) |
|
|
|
|
smem[ly + 14][lx] = *(__global const int *)(mag + |
|
|
|
|
mad24(mag_step, min(grp_idy + ly + 14, rows - 1), (int)sizeof(int) * (grp_idx + lx))); |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
int mag0 = mag[i]; |
|
|
|
|
|
|
|
|
|
if (gidy < rows && gidx < cols) |
|
|
|
|
int value = 1; |
|
|
|
|
if (mag0 > low_thr) |
|
|
|
|
{ |
|
|
|
|
// 0 - the pixel can not belong to an edge |
|
|
|
|
// 1 - the pixel might belong to an edge |
|
|
|
|
// 2 - the pixel does belong to an edge |
|
|
|
|
int edge_type = 0; |
|
|
|
|
int m = smem[lidy + 1][lidx + 1]; |
|
|
|
|
int a = (y / (float)x) * TG22; |
|
|
|
|
int b = (y / (float)x) * TG67; |
|
|
|
|
|
|
|
|
|
if (m > low_thresh) |
|
|
|
|
{ |
|
|
|
|
short xs = *(__global const short *)(dx + mad24(gidy, dx_step, mad24(gidx, (int)sizeof(short) * cn, dx_offset))); |
|
|
|
|
short ys = *(__global const short *)(dy + mad24(gidy, dy_step, mad24(gidx, (int)sizeof(short) * cn, dy_offset))); |
|
|
|
|
int x = abs(xs), y = abs(ys); |
|
|
|
|
a = min((int)abs(a), 1) + 1; |
|
|
|
|
b = min((int)abs(b), 1); |
|
|
|
|
|
|
|
|
|
int tg22x = x * TG22; |
|
|
|
|
y <<= CANNY_SHIFT; |
|
|
|
|
// a = { 1, 2 } |
|
|
|
|
// b = { 0, 1 } |
|
|
|
|
// a * b = { 0, 1, 2 } - directions that we need ( + 3 if x ^ y < 0) |
|
|
|
|
|
|
|
|
|
if (y < tg22x) |
|
|
|
|
{ |
|
|
|
|
if (m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2]) |
|
|
|
|
edge_type = 1 + (int)(m > high_thresh); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
int tg67x = tg22x + (x << (1 + CANNY_SHIFT)); |
|
|
|
|
if (y > tg67x) |
|
|
|
|
{ |
|
|
|
|
if (m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1]) |
|
|
|
|
edge_type = 1 + (int)(m > high_thresh); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31); // if a = 1, b = 1, dy ^ dx < 0 |
|
|
|
|
int dir = a * b + 2 * dir3; |
|
|
|
|
int prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]]; |
|
|
|
|
int next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1); |
|
|
|
|
|
|
|
|
|
if (mag0 > prev_mag && mag0 >= next_mag) |
|
|
|
|
{ |
|
|
|
|
int s = (xs ^ ys) < 0 ? -1 : 1; |
|
|
|
|
if (m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s]) |
|
|
|
|
edge_type = 1 + (int)(m > high_thresh); |
|
|
|
|
} |
|
|
|
|
value = (mag0 > high_thr) ? 2 : 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
*(__global int *)(map + mad24(map_step, gidy + 1, mad24(gidx + 1, (int)sizeof(int), + map_offset))) = edge_type; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset))); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#undef CANNY_SHIFT |
|
|
|
|
#undef TG22 |
|
|
|
|
#elif defined WITHOUT_SOBEL |
|
|
|
|
|
|
|
|
|
#elif defined OP_HYST_LOCAL |
|
|
|
|
/* |
|
|
|
|
stage1_without_sobel: |
|
|
|
|
Calc magnitudes |
|
|
|
|
Non maxima suppression |
|
|
|
|
Double thresholding |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
struct PtrStepSz |
|
|
|
|
{ |
|
|
|
|
__global uchar * ptr; |
|
|
|
|
int step, rows, cols; |
|
|
|
|
#define loadpix(addr) (__global short *)(addr) |
|
|
|
|
#define storepix(val, addr) *(__global int *)(addr) = (int)(val) |
|
|
|
|
|
|
|
|
|
#ifdef L2GRAD |
|
|
|
|
#define dist(x, y) ((int)(x) * (x) + (int)(y) * (y)) |
|
|
|
|
#else |
|
|
|
|
#define dist(x, y) (abs(x) + abs(y)) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
__constant int prev[4][2] = { |
|
|
|
|
{ 0, -1 }, |
|
|
|
|
{ -1, -1 }, |
|
|
|
|
{ -1, 0 }, |
|
|
|
|
{ -1, 1 } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
inline int get(struct PtrStepSz data, int y, int x) |
|
|
|
|
{ |
|
|
|
|
return *(__global int *)(data.ptr + mad24(data.step, y + 1, (int)sizeof(int) * (x + 1))); |
|
|
|
|
} |
|
|
|
|
__constant int next[4][2] = { |
|
|
|
|
{ 0, 1 }, |
|
|
|
|
{ 1, 1 }, |
|
|
|
|
{ 1, 0 }, |
|
|
|
|
{ 1, -1 } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
inline void set(struct PtrStepSz data, int y, int x, int value) |
|
|
|
|
__kernel void stage1_without_sobel(__global const uchar *dxptr, int dx_step, int dx_offset, |
|
|
|
|
__global const uchar *dyptr, int dy_step, int dy_offset, |
|
|
|
|
__global uchar *map, int map_step, int map_offset, int rows, int cols, |
|
|
|
|
int low_thr, int high_thr) |
|
|
|
|
{ |
|
|
|
|
*(__global int *)(data.ptr + mad24(data.step, y + 1, (int)sizeof(int) * (x + 1))) = value; |
|
|
|
|
} |
|
|
|
|
int start_x = get_group_id(0) * GRP_SIZEX; |
|
|
|
|
int start_y = get_group_id(1) * GRP_SIZEY; |
|
|
|
|
|
|
|
|
|
// perform Hysteresis for pixel whose edge type is 1 |
|
|
|
|
// |
|
|
|
|
// If candidate pixel (edge type is 1) has a neighbour pixel (in 3x3 area) with type 2, it is believed to be part of an edge and |
|
|
|
|
// marked as edge. Each thread will iterate for 16 times to connect local edges. |
|
|
|
|
// Candidate pixel being identified as edge will then be tested if there is nearby potiential edge points. If there is, counter will |
|
|
|
|
// be incremented by 1 and the point location is stored. These potiential candidates will be processed further in next kernel. |
|
|
|
|
// |
|
|
|
|
// map raw edge type results calculated from calcMap. |
|
|
|
|
// stack the potiential edge points found in this kernel call |
|
|
|
|
// counter the number of potiential edge points |
|
|
|
|
int lidx = get_local_id(0); |
|
|
|
|
int lidy = get_local_id(1); |
|
|
|
|
|
|
|
|
|
__kernel void edgesHysteresisLocal(__global uchar * map_ptr, int map_step, int map_offset, |
|
|
|
|
__global ushort2 * st, __global unsigned int * counter, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
struct PtrStepSz map = { map_ptr + map_offset, map_step, rows + 1, cols + 1 }; |
|
|
|
|
|
|
|
|
|
__local int smem[18][18]; |
|
|
|
|
|
|
|
|
|
int2 blockIdx = (int2)(get_group_id(0), get_group_id(1)); |
|
|
|
|
int2 blockDim = (int2)(get_local_size(0), get_local_size(1)); |
|
|
|
|
int2 threadIdx = (int2)(get_local_id(0), get_local_id(1)); |
|
|
|
|
|
|
|
|
|
const int x = blockIdx.x * blockDim.x + threadIdx.x; |
|
|
|
|
const int y = blockIdx.y * blockDim.y + threadIdx.y; |
|
|
|
|
|
|
|
|
|
smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? get(map, y, x) : 0; |
|
|
|
|
if (threadIdx.y == 0) |
|
|
|
|
smem[0][threadIdx.x + 1] = x < map.cols ? get(map, y - 1, x) : 0; |
|
|
|
|
if (threadIdx.y == blockDim.y - 1) |
|
|
|
|
smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? get(map, y + 1, x) : 0; |
|
|
|
|
if (threadIdx.x == 0) |
|
|
|
|
smem[threadIdx.y + 1][0] = y < map.rows ? get(map, y, x - 1) : 0; |
|
|
|
|
if (threadIdx.x == blockDim.x - 1) |
|
|
|
|
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols && y < map.rows ? get(map, y, x + 1) : 0; |
|
|
|
|
if (threadIdx.x == 0 && threadIdx.y == 0) |
|
|
|
|
smem[0][0] = y > 0 && x > 0 ? get(map, y - 1, x - 1) : 0; |
|
|
|
|
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0) |
|
|
|
|
smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? get(map, y - 1, x + 1) : 0; |
|
|
|
|
if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1) |
|
|
|
|
smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? get(map, y + 1, x - 1) : 0; |
|
|
|
|
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1) |
|
|
|
|
smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? get(map, y + 1, x + 1) : 0; |
|
|
|
|
__local int mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)]; |
|
|
|
|
__local short2 sigma[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)]; |
|
|
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int i = lidx + lidy * GRP_SIZEX; i < (GRP_SIZEX + 2) * (GRP_SIZEY + 2); i += GRP_SIZEX * GRP_SIZEY) |
|
|
|
|
{ |
|
|
|
|
int x = clamp(start_x - 1 + i % (GRP_SIZEX + 2), 0, cols - 1); |
|
|
|
|
int y = clamp(start_y - 1 + i / (GRP_SIZEX + 2), 0, rows - 1); |
|
|
|
|
|
|
|
|
|
if (x >= cols || y >= rows) |
|
|
|
|
return; |
|
|
|
|
int dx_index = mad24(y, dx_step, mad24(x, cn * (int)sizeof(short), dx_offset)); |
|
|
|
|
int dy_index = mad24(y, dy_step, mad24(x, cn * (int)sizeof(short), dy_offset)); |
|
|
|
|
|
|
|
|
|
int n; |
|
|
|
|
__global short *dx = loadpix(dxptr + dx_index); |
|
|
|
|
__global short *dy = loadpix(dyptr + dy_index); |
|
|
|
|
|
|
|
|
|
#pragma unroll |
|
|
|
|
for (int k = 0; k < 16; ++k) |
|
|
|
|
int mag0 = dist(dx[0], dy[0]); |
|
|
|
|
#if cn > 1 |
|
|
|
|
short cdx = dx[0], cdy = dy[0]; |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int j = 1; j < cn; ++j) |
|
|
|
|
{ |
|
|
|
|
n = 0; |
|
|
|
|
|
|
|
|
|
if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1) |
|
|
|
|
int mag1 = dist(dx[j], dy[j]); |
|
|
|
|
if (mag1 > mag0) |
|
|
|
|
{ |
|
|
|
|
n += smem[threadIdx.y ][threadIdx.x ] == 2; |
|
|
|
|
n += smem[threadIdx.y ][threadIdx.x + 1] == 2; |
|
|
|
|
n += smem[threadIdx.y ][threadIdx.x + 2] == 2; |
|
|
|
|
mag0 = mag1; |
|
|
|
|
cdx = dx[j]; |
|
|
|
|
cdy = dy[j]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
dx[0] = cdx; |
|
|
|
|
dy[0] = cdy; |
|
|
|
|
#endif |
|
|
|
|
mag[i] = mag0; |
|
|
|
|
sigma[i] = (short2)(dx[0], dy[0]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
n += smem[threadIdx.y + 1][threadIdx.x ] == 2; |
|
|
|
|
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2; |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
n += smem[threadIdx.y + 2][threadIdx.x ] == 2; |
|
|
|
|
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2; |
|
|
|
|
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2; |
|
|
|
|
} |
|
|
|
|
int gidx = get_global_id(0); |
|
|
|
|
int gidy = get_global_id(1); |
|
|
|
|
|
|
|
|
|
if (n > 0) |
|
|
|
|
smem[threadIdx.y + 1][threadIdx.x + 1] = 2; |
|
|
|
|
} |
|
|
|
|
if (gidx >= cols || gidy >= rows) |
|
|
|
|
return; |
|
|
|
|
|
|
|
|
|
lidx++; |
|
|
|
|
lidy++; |
|
|
|
|
|
|
|
|
|
const int e = smem[threadIdx.y + 1][threadIdx.x + 1]; |
|
|
|
|
set(map, y, x, e); |
|
|
|
|
n = 0; |
|
|
|
|
int mag0 = mag[lidx + lidy * (GRP_SIZEX + 2)]; |
|
|
|
|
short x = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).x; |
|
|
|
|
short y = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).y; |
|
|
|
|
|
|
|
|
|
if (e == 2) |
|
|
|
|
int value = 1; |
|
|
|
|
if (mag0 > low_thr) |
|
|
|
|
{ |
|
|
|
|
n += smem[threadIdx.y ][threadIdx.x ] == 1; |
|
|
|
|
n += smem[threadIdx.y ][threadIdx.x + 1] == 1; |
|
|
|
|
n += smem[threadIdx.y ][threadIdx.x + 2] == 1; |
|
|
|
|
int a = (y / (float)x) * TG22; |
|
|
|
|
int b = (y / (float)x) * TG67; |
|
|
|
|
|
|
|
|
|
n += smem[threadIdx.y + 1][threadIdx.x ] == 1; |
|
|
|
|
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1; |
|
|
|
|
a = min((int)abs(a), 1) + 1; |
|
|
|
|
b = min((int)abs(b), 1); |
|
|
|
|
|
|
|
|
|
n += smem[threadIdx.y + 2][threadIdx.x ] == 1; |
|
|
|
|
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1; |
|
|
|
|
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1; |
|
|
|
|
} |
|
|
|
|
int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31); |
|
|
|
|
int dir = a * b + 2 * dir3; |
|
|
|
|
int prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]]; |
|
|
|
|
int next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1); |
|
|
|
|
|
|
|
|
|
if (n > 0) |
|
|
|
|
if (mag0 > prev_mag && mag0 >= next_mag) |
|
|
|
|
{ |
|
|
|
|
const int ind = atomic_inc(counter); |
|
|
|
|
st[ind] = (ushort2)(x + 1, y + 1); |
|
|
|
|
value = (mag0 > high_thr) ? 2 : 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset))); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#elif defined OP_HYST_GLOBAL |
|
|
|
|
#undef TG22 |
|
|
|
|
#undef CANNY_SHIFT |
|
|
|
|
|
|
|
|
|
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; |
|
|
|
|
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; |
|
|
|
|
#elif defined STAGE2 |
|
|
|
|
/* |
|
|
|
|
stage2: |
|
|
|
|
hysteresis (add edges labeled 0 if they are connected with an edge labeled 2) |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
#define loadpix(addr) *(__global int *)(addr) |
|
|
|
|
#define storepix(val, addr) *(__global int *)(addr) = (int)(val) |
|
|
|
|
#define l_stack_size 256 |
|
|
|
|
#define p_stack_size 8 |
|
|
|
|
|
|
|
|
|
#define stack_size 512 |
|
|
|
|
#define map_index mad24(map_step, pos.y, pos.x * (int)sizeof(int)) |
|
|
|
|
__constant short move_dir[2][8] = { |
|
|
|
|
{ -1, -1, -1, 0, 0, 1, 1, 1 }, |
|
|
|
|
{ -1, 0, 1, -1, 1, -1, 0, 1 } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
__kernel void edgesHysteresisGlobal(__global uchar * map, int map_step, int map_offset, |
|
|
|
|
__global ushort2 * st1, __global ushort2 * st2, __global int * counter, |
|
|
|
|
int rows, int cols, int count) |
|
|
|
|
__kernel void stage2_hysteresis(__global uchar *map, int map_step, int map_offset, int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
map += map_offset; |
|
|
|
|
|
|
|
|
|
int lidx = get_local_id(0); |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y0 = get_global_id(1) * PIX_PER_WI; |
|
|
|
|
|
|
|
|
|
int grp_idx = get_group_id(0); |
|
|
|
|
int grp_idy = get_group_id(1); |
|
|
|
|
int lid = get_local_id(0) + get_local_id(1) * 32; |
|
|
|
|
|
|
|
|
|
__local unsigned int s_counter, s_ind; |
|
|
|
|
__local ushort2 s_st[stack_size]; |
|
|
|
|
__local ushort2 l_stack[l_stack_size]; |
|
|
|
|
__local int l_counter; |
|
|
|
|
|
|
|
|
|
if (lidx == 0) |
|
|
|
|
s_counter = 0; |
|
|
|
|
if (lid == 0) |
|
|
|
|
l_counter = 0; |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
int ind = mad24(grp_idy, (int)get_local_size(0), grp_idx); |
|
|
|
|
|
|
|
|
|
if (ind < count) |
|
|
|
|
{ |
|
|
|
|
ushort2 pos = st1[ind]; |
|
|
|
|
if (lidx < 8) |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int y = y0; y < min(y0 + PIX_PER_WI, rows); ++y) |
|
|
|
|
{ |
|
|
|
|
pos.x += c_dx[lidx]; |
|
|
|
|
pos.y += c_dy[lidx]; |
|
|
|
|
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && *(__global int *)(map + map_index) == 1) |
|
|
|
|
int type = loadpix(map + mad24(y, map_step, x * (int)sizeof(int))); |
|
|
|
|
if (type == 2) |
|
|
|
|
{ |
|
|
|
|
*(__global int *)(map + map_index) = 2; |
|
|
|
|
ind = atomic_inc(&s_counter); |
|
|
|
|
s_st[ind] = pos; |
|
|
|
|
l_stack[atomic_inc(&l_counter)] = (ushort2)(x, y); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
while (s_counter > 0 && s_counter <= stack_size - get_local_size(0)) |
|
|
|
|
{ |
|
|
|
|
const int subTaskIdx = lidx >> 3; |
|
|
|
|
const int portion = min(s_counter, (uint)(get_local_size(0)>> 3)); |
|
|
|
|
ushort2 p_stack[p_stack_size]; |
|
|
|
|
int p_counter = 0; |
|
|
|
|
|
|
|
|
|
if (subTaskIdx < portion) |
|
|
|
|
pos = s_st[s_counter - 1 - subTaskIdx]; |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
while(l_counter != 0) |
|
|
|
|
{ |
|
|
|
|
int mod = l_counter % 64; |
|
|
|
|
int pix_per_thr = l_counter / 64 + (lid < mod) ? 1 : 0; |
|
|
|
|
|
|
|
|
|
if (lidx == 0) |
|
|
|
|
s_counter -= portion; |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int i = 0; i < pix_per_thr; ++i) |
|
|
|
|
{ |
|
|
|
|
ushort2 pos = l_stack[ atomic_dec(&l_counter) - 1 ]; |
|
|
|
|
|
|
|
|
|
if (subTaskIdx < portion) |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int j = 0; j < 8; ++j) |
|
|
|
|
{ |
|
|
|
|
pos.x += c_dx[lidx & 7]; |
|
|
|
|
pos.y += c_dy[lidx & 7]; |
|
|
|
|
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows && *(__global int *)(map + map_index) == 1) |
|
|
|
|
ushort posx = pos.x + move_dir[0][j]; |
|
|
|
|
ushort posy = pos.y + move_dir[1][j]; |
|
|
|
|
if (posx < 0 || posy < 0 || posx >= cols || posy >= rows) |
|
|
|
|
continue; |
|
|
|
|
__global uchar *addr = map + mad24(posy, map_step, posx * (int)sizeof(int)); |
|
|
|
|
int type = loadpix(addr); |
|
|
|
|
if (type == 0) |
|
|
|
|
{ |
|
|
|
|
*(__global int *)(map + map_index) = 2; |
|
|
|
|
ind = atomic_inc(&s_counter); |
|
|
|
|
s_st[ind] = pos; |
|
|
|
|
p_stack[p_counter++] = (ushort2)(posx, posy); |
|
|
|
|
storepix(2, addr); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
} |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
if (s_counter > 0) |
|
|
|
|
while (p_counter > 0) |
|
|
|
|
{ |
|
|
|
|
if (lidx == 0) |
|
|
|
|
{ |
|
|
|
|
ind = atomic_add(counter, s_counter); |
|
|
|
|
s_ind = ind - s_counter; |
|
|
|
|
l_stack[ atomic_inc(&l_counter) ] = p_stack[--p_counter]; |
|
|
|
|
} |
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
ind = s_ind; |
|
|
|
|
for (int i = lidx; i < (int)s_counter; i += get_local_size(0)) |
|
|
|
|
st2[ind + i] = s_st[i]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#undef map_index |
|
|
|
|
#undef stack_size |
|
|
|
|
|
|
|
|
|
#elif defined OP_EDGES |
|
|
|
|
#elif defined GET_EDGES |
|
|
|
|
|
|
|
|
|
// Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0. |
|
|
|
|
// map edge type mappings |
|
|
|
|
// dst edge output |
|
|
|
|
|
|
|
|
|
__kernel void getEdges(__global const uchar * mapptr, int map_step, int map_offset, |
|
|
|
|
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols) |
|
|
|
|
__kernel void getEdges(__global const uchar *mapptr, int map_step, int map_offset, int rows, int cols, |
|
|
|
|
__global uchar *dst, int dst_step, int dst_offset) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y0 = get_global_id(1) * PIX_PER_WI; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
#pragma unroll |
|
|
|
|
for (int y = y0; y < min(y0 + PIX_PER_WI, rows); ++y) |
|
|
|
|
{ |
|
|
|
|
int map_index = mad24(map_step, y + 1, mad24(x + 1, (int)sizeof(int), map_offset)); |
|
|
|
|
int dst_index = mad24(dst_step, y, x + dst_offset); |
|
|
|
|
int map_index = mad24(map_step, y, mad24(x, (int)sizeof(int), map_offset)); |
|
|
|
|
int dst_index = mad24(dst_step, y, x) + dst_offset; |
|
|
|
|
|
|
|
|
|
__global const int * map = (__global const int *)(mapptr + map_index); |
|
|
|
|
|
|
|
|
|
dst[dst_index] = (uchar)(-(map[0] >> 1)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|