|
|
|
@ -45,7 +45,7 @@ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// resize kernel |
|
|
|
|
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported. |
|
|
|
|
// Currently, CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 are supported. |
|
|
|
|
// We shall support other types later if necessary. |
|
|
|
|
|
|
|
|
|
#ifdef DOUBLE_SUPPORT |
|
|
|
@ -54,20 +54,18 @@ |
|
|
|
|
#elif defined (cl_khr_fp64) |
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_fp64:enable |
|
|
|
|
#endif |
|
|
|
|
#define F double |
|
|
|
|
#else |
|
|
|
|
#define F float |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define INTER_RESIZE_COEF_BITS 11 |
|
|
|
|
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS) |
|
|
|
|
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1) |
|
|
|
|
#define CAST_SCALE (1.0f/(1<<CAST_BITS)) |
|
|
|
|
#define INC(x,l) ((x+1) >= (l) ? (x):((x)+1)) |
|
|
|
|
|
|
|
|
|
#ifdef LN |
|
|
|
|
|
|
|
|
|
__kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restrict src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int dst_offset, int src_offset,int dst_step, int src_step, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) |
|
|
|
|
{ |
|
|
|
|
int gx = get_global_id(0); |
|
|
|
@ -75,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri |
|
|
|
|
|
|
|
|
|
float4 sx, u, xf; |
|
|
|
|
int4 x, DX; |
|
|
|
|
gx = (gx<<2) - (dstoffset_in_pixel&3); |
|
|
|
|
gx = (gx<<2) - (dst_offset&3); |
|
|
|
|
DX = (int4)(gx, gx+1, gx+2, gx+3); |
|
|
|
|
sx = (convert_float4(DX) + 0.5f) * ifx - 0.5f; |
|
|
|
|
xf = floor(sx); |
|
|
|
@ -113,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri |
|
|
|
|
int4 val1, val2, val; |
|
|
|
|
int4 sdata1, sdata2, sdata3, sdata4; |
|
|
|
|
|
|
|
|
|
int4 pos1 = mad24((int4)y, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel); |
|
|
|
|
int4 pos2 = mad24((int4)y, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel); |
|
|
|
|
int4 pos3 = mad24((int4)y_, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel); |
|
|
|
|
int4 pos4 = mad24((int4)y_, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel); |
|
|
|
|
int4 pos1 = mad24((int4)y, (int4)src_step, x+(int4)src_offset); |
|
|
|
|
int4 pos2 = mad24((int4)y, (int4)src_step, x_+(int4)src_offset); |
|
|
|
|
int4 pos3 = mad24((int4)y_, (int4)src_step, x+(int4)src_offset); |
|
|
|
|
int4 pos4 = mad24((int4)y_, (int4)src_step, x_+(int4)src_offset); |
|
|
|
|
|
|
|
|
|
sdata1.s0 = src[pos1.s0]; |
|
|
|
|
sdata1.s1 = src[pos1.s1]; |
|
|
|
@ -144,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri |
|
|
|
|
|
|
|
|
|
val = ((val + (1<<(CAST_BITS-1))) >> CAST_BITS); |
|
|
|
|
|
|
|
|
|
pos4 = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel); |
|
|
|
|
pos4 = mad24(dy, dst_step, gx+dst_offset); |
|
|
|
|
pos4.y++; |
|
|
|
|
pos4.z+=2; |
|
|
|
|
pos4.w+=3; |
|
|
|
|
uchar4 uval = convert_uchar4_sat(val); |
|
|
|
|
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0); |
|
|
|
|
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dst_offset&3)==0); |
|
|
|
|
if(con) |
|
|
|
|
{ |
|
|
|
|
*(__global uchar4*)(dst + pos4.x)=uval; |
|
|
|
@ -176,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int dst_offset, int src_offset,int dst_step, int src_step, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
@ -202,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src, |
|
|
|
|
int y_ = INC(y,src_rows); |
|
|
|
|
int x_ = INC(x,src_cols); |
|
|
|
|
int4 srcpos; |
|
|
|
|
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel); |
|
|
|
|
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel); |
|
|
|
|
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel); |
|
|
|
|
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel); |
|
|
|
|
srcpos.x = mad24(y, src_step, x+src_offset); |
|
|
|
|
srcpos.y = mad24(y, src_step, x_+src_offset); |
|
|
|
|
srcpos.z = mad24(y_, src_step, x+src_offset); |
|
|
|
|
srcpos.w = mad24(y_, src_step, x_+src_offset); |
|
|
|
|
int4 data0 = convert_int4(src[srcpos.x]); |
|
|
|
|
int4 data1 = convert_int4(src[srcpos.y]); |
|
|
|
|
int4 data2 = convert_int4(src[srcpos.z]); |
|
|
|
|
int4 data3 = convert_int4(src[srcpos.w]); |
|
|
|
|
int4 val = mul24((int4)mul24(U1, V1) , data0) + mul24((int4)mul24(U, V1) , data1) |
|
|
|
|
+mul24((int4)mul24(U1, V) , data2)+mul24((int4)mul24(U, V) , data3); |
|
|
|
|
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel); |
|
|
|
|
int dstpos = mad24(dy, dst_step, dx+dst_offset); |
|
|
|
|
uchar4 uval = convert_uchar4((val + (1<<(CAST_BITS-1)))>>CAST_BITS); |
|
|
|
|
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows) |
|
|
|
|
dst[dstpos] = uval; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void resizeLN_C1_D5(__global float * dst, __global float * src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int dst_offset, int src_offset,int dst_step, int src_step, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
@ -239,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src, |
|
|
|
|
float u1 = 1.f-u; |
|
|
|
|
float v1 = 1.f-v; |
|
|
|
|
int4 srcpos; |
|
|
|
|
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel); |
|
|
|
|
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel); |
|
|
|
|
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel); |
|
|
|
|
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel); |
|
|
|
|
srcpos.x = mad24(y, src_step, x+src_offset); |
|
|
|
|
srcpos.y = mad24(y, src_step, x_+src_offset); |
|
|
|
|
srcpos.z = mad24(y_, src_step, x+src_offset); |
|
|
|
|
srcpos.w = mad24(y_, src_step, x_+src_offset); |
|
|
|
|
float data0 = src[srcpos.x]; |
|
|
|
|
float data1 = src[srcpos.y]; |
|
|
|
|
float data2 = src[srcpos.z]; |
|
|
|
@ -252,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src, |
|
|
|
|
float val2 = u1 * data2 + |
|
|
|
|
u * data3; |
|
|
|
|
float val = v1 * val1 + v * val2; |
|
|
|
|
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel); |
|
|
|
|
int dstpos = mad24(dy, dst_step, dx+dst_offset); |
|
|
|
|
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows) |
|
|
|
|
dst[dstpos] = val; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int dst_offset, int src_offset,int dst_step, int src_step, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
@ -278,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src, |
|
|
|
|
float u1 = 1.f-u; |
|
|
|
|
float v1 = 1.f-v; |
|
|
|
|
int4 srcpos; |
|
|
|
|
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel); |
|
|
|
|
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel); |
|
|
|
|
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel); |
|
|
|
|
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel); |
|
|
|
|
srcpos.x = mad24(y, src_step, x+src_offset); |
|
|
|
|
srcpos.y = mad24(y, src_step, x_+src_offset); |
|
|
|
|
srcpos.z = mad24(y_, src_step, x+src_offset); |
|
|
|
|
srcpos.w = mad24(y_, src_step, x_+src_offset); |
|
|
|
|
float4 s_data1, s_data2, s_data3, s_data4; |
|
|
|
|
s_data1 = src[srcpos.x]; |
|
|
|
|
s_data2 = src[srcpos.y]; |
|
|
|
@ -289,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src, |
|
|
|
|
s_data4 = src[srcpos.w]; |
|
|
|
|
float4 val = u1 * v1 * s_data1 + u * v1 * s_data2 |
|
|
|
|
+u1 * v *s_data3 + u * v *s_data4; |
|
|
|
|
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel); |
|
|
|
|
int dstpos = mad24(dy, dst_step, dx+dst_offset); |
|
|
|
|
|
|
|
|
|
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows) |
|
|
|
|
dst[dstpos] = val; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void resizeNN_C1_D0(__global uchar * dst, __global uchar * src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify ) |
|
|
|
|
{ |
|
|
|
|
int gx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
|
|
|
|
|
|
gx = (gx<<2) - (dstoffset_in_pixel&3); |
|
|
|
|
//int4 GX = (int4)(gx, gx+1, gx+2, gx+3); |
|
|
|
|
|
|
|
|
|
int4 sx; |
|
|
|
|
int sy; |
|
|
|
|
F ss1 = gx*ifx; |
|
|
|
|
F ss2 = (gx+1)*ifx; |
|
|
|
|
F ss3 = (gx+2)*ifx; |
|
|
|
|
F ss4 = (gx+3)*ifx; |
|
|
|
|
F s5 = dy * ify; |
|
|
|
|
sx.s0 = min((int)floor(ss1), src_cols-1); |
|
|
|
|
sx.s1 = min((int)floor(ss2), src_cols-1); |
|
|
|
|
sx.s2 = min((int)floor(ss3), src_cols-1); |
|
|
|
|
sx.s3 = min((int)floor(ss4), src_cols-1); |
|
|
|
|
sy = min((int)floor(s5), src_rows-1); |
|
|
|
|
|
|
|
|
|
uchar4 val; |
|
|
|
|
int4 pos = mad24((int4)sy, (int4)srcstep_in_pixel, sx+(int4)srcoffset_in_pixel); |
|
|
|
|
val.s0 = src[pos.s0]; |
|
|
|
|
val.s1 = src[pos.s1]; |
|
|
|
|
val.s2 = src[pos.s2]; |
|
|
|
|
val.s3 = src[pos.s3]; |
|
|
|
|
|
|
|
|
|
//__global uchar4* d = (__global uchar4*)(dst + dstoffset_in_pixel + dy * dststep_in_pixel + gx); |
|
|
|
|
//uchar4 dVal = *d; |
|
|
|
|
pos = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel); |
|
|
|
|
pos.y++; |
|
|
|
|
pos.z+=2; |
|
|
|
|
pos.w+=3; |
|
|
|
|
|
|
|
|
|
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0); |
|
|
|
|
if(con) |
|
|
|
|
{ |
|
|
|
|
*(__global uchar4*)(dst + pos.x)=val; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
if(gx >= 0 && gx < dst_cols && dy >= 0 && dy < dst_rows) |
|
|
|
|
{ |
|
|
|
|
dst[pos.x]=val.x; |
|
|
|
|
} |
|
|
|
|
if(gx+1 >= 0 && gx+1 < dst_cols && dy >= 0 && dy < dst_rows) |
|
|
|
|
{ |
|
|
|
|
dst[pos.y]=val.y; |
|
|
|
|
} |
|
|
|
|
if(gx+2 >= 0 && gx+2 < dst_cols && dy >= 0 && dy < dst_rows) |
|
|
|
|
{ |
|
|
|
|
dst[pos.z]=val.z; |
|
|
|
|
} |
|
|
|
|
if(gx+3 >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows) |
|
|
|
|
{ |
|
|
|
|
dst[pos.w]=val.w; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#elif defined NN |
|
|
|
|
|
|
|
|
|
__kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify ) |
|
|
|
|
__kernel void resizeNN(__global T * dst, __global T * src, |
|
|
|
|
int dst_offset, int src_offset,int dst_step, int src_step, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
|
|
|
|
|
|
F s1 = dx*ifx; |
|
|
|
|
F s2 = dy*ify; |
|
|
|
|
int sx = fmin((float)floor(s1), (float)src_cols-1); |
|
|
|
|
int sy = fmin((float)floor(s2), (float)src_rows-1); |
|
|
|
|
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel); |
|
|
|
|
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel); |
|
|
|
|
|
|
|
|
|
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows) |
|
|
|
|
dst[dpos] = src[spos]; |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void resizeNN_C1_D5(__global float * dst, __global float * src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify ) |
|
|
|
|
if (dx < dst_cols && dy < dst_rows) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
|
float s1 = dx * ifx, s2 = dy * ify; |
|
|
|
|
int sx = min(convert_int_sat_rtn(s1), src_cols - 1); |
|
|
|
|
int sy = min(convert_int_sat_rtn(s2), src_rows - 1); |
|
|
|
|
|
|
|
|
|
F s1 = dx*ifx; |
|
|
|
|
F s2 = dy*ify; |
|
|
|
|
int sx = fmin((float)floor(s1), (float)src_cols-1); |
|
|
|
|
int sy = fmin((float)floor(s2), (float)src_rows-1); |
|
|
|
|
|
|
|
|
|
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel); |
|
|
|
|
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel); |
|
|
|
|
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows) |
|
|
|
|
dst[dpos] = src[spos]; |
|
|
|
|
int dst_index = mad24(dy, dst_step, dx + dst_offset); |
|
|
|
|
int src_index = mad24(sy, src_step, sx + src_offset); |
|
|
|
|
|
|
|
|
|
dst[dst_index] = src[src_index]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src, |
|
|
|
|
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
|
F s1 = dx*ifx; |
|
|
|
|
F s2 = dy*ify; |
|
|
|
|
int s_col = floor(s1); |
|
|
|
|
int s_row = floor(s2); |
|
|
|
|
int sx = min(s_col, src_cols-1); |
|
|
|
|
int sy = min(s_row, src_rows-1); |
|
|
|
|
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel); |
|
|
|
|
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel); |
|
|
|
|
|
|
|
|
|
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows) |
|
|
|
|
dst[dpos] = src[spos]; |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|