mirror of https://github.com/opencv/opencv.git
1. HaarDetetctor: repack nodes to reduce memory footprint 2. cornerMinEigVal: 4 ocl kernels are fused into 1 for sobel calculationpull/1675/head
parent
6dda2652cd
commit
2f62940a0e
6 changed files with 443 additions and 11 deletions
@ -0,0 +1,108 @@ |
||||
/////////////////////////////////////////////////////////////////////////////////////////////////// |
||||
/////////////////////////////////Macro for border type//////////////////////////////////////////// |
||||
///////////////////////////////////////////////////////////////////////////////////////////////// |
||||
#ifdef BORDER_REPLICATE |
||||
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh |
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i)) |
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr)) |
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i)) |
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr)) |
||||
#endif |
||||
|
||||
#ifdef BORDER_REFLECT |
||||
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb |
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i)) |
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)) |
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i)) |
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr)) |
||||
#endif |
||||
|
||||
#ifdef BORDER_REFLECT101 |
||||
//BORDER_REFLECT101: gfedcb|abcdefgh|gfedcba |
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i)) |
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr)) |
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i)) |
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr)) |
||||
#endif |
||||
|
||||
#ifdef BORDER_WRAP |
||||
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg |
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i)) |
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr)) |
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i)) |
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr)) |
||||
#endif |
||||
|
||||
__kernel void sobel3( |
||||
__global uchar* Src, |
||||
__global float* DstX, |
||||
__global float* DstY, |
||||
int width, int height, |
||||
uint srcStride, uint dstStride, |
||||
float scale |
||||
) |
||||
{ |
||||
__local float lsmem[BLK_Y+2][BLK_X+2]; |
||||
|
||||
int lix = get_local_id(0); |
||||
int liy = get_local_id(1); |
||||
|
||||
int gix = get_group_id(0); |
||||
int giy = get_group_id(1); |
||||
|
||||
int id_x = get_global_id(0); |
||||
int id_y = get_global_id(1); |
||||
|
||||
lsmem[liy+1][lix+1] = convert_float(Src[ id_y * srcStride + id_x ]); |
||||
|
||||
int id_y_h = ADDR_H(id_y-1, 0); |
||||
int id_y_b = ADDR_B(id_y+1, height); |
||||
|
||||
int id_x_l = ADDR_L(id_x-1, 0); |
||||
int id_x_r = ADDR_R(id_x+1, width); |
||||
|
||||
if(liy==0) |
||||
{ |
||||
lsmem[0][lix+1]=convert_float(Src[ id_y_h * srcStride + id_x ]); |
||||
|
||||
if(lix==0) |
||||
lsmem[0][0]=convert_float(Src[ id_y_h * srcStride + id_x_l ]); |
||||
else if(lix==BLK_X-1) |
||||
lsmem[0][BLK_X+1]=convert_float(Src[ id_y_h * srcStride + id_x_r ]); |
||||
} |
||||
else if(liy==BLK_Y-1) |
||||
{ |
||||
lsmem[BLK_Y+1][lix+1]=convert_float(Src[ id_y_b * srcStride + id_x ]); |
||||
|
||||
if(lix==0) |
||||
lsmem[BLK_Y+1][0]=convert_float(Src[ id_y_b * srcStride + id_x_l ]); |
||||
else if(lix==BLK_X-1) |
||||
lsmem[BLK_Y+1][BLK_X+1]=convert_float(Src[ id_y_b * srcStride + id_x_r ]); |
||||
} |
||||
|
||||
if(lix==0) |
||||
lsmem[liy+1][0] = convert_float(Src[ id_y * srcStride + id_x_l ]); |
||||
else if(lix==BLK_X-1) |
||||
lsmem[liy+1][BLK_X+1] = convert_float(Src[ id_y * srcStride + id_x_r ]); |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
float u1 = lsmem[liy][lix]; |
||||
float u2 = lsmem[liy][lix+1]; |
||||
float u3 = lsmem[liy][lix+2]; |
||||
|
||||
float m1 = lsmem[liy+1][lix]; |
||||
float m2 = lsmem[liy+1][lix+1]; |
||||
float m3 = lsmem[liy+1][lix+2]; |
||||
|
||||
float b1 = lsmem[liy+2][lix]; |
||||
float b2 = lsmem[liy+2][lix+1]; |
||||
float b3 = lsmem[liy+2][lix+2]; |
||||
|
||||
//m2 * scale;// |
||||
float dx = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1 ); |
||||
DstX[ id_y * dstStride + id_x ] = dx * scale; |
||||
|
||||
float dy = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3); |
||||
DstY[ id_y * dstStride + id_x ] = dy * scale; |
||||
} |
Loading…
Reference in new issue