|
|
|
@ -47,8 +47,12 @@ |
|
|
|
|
//wrapPerspective kernel |
|
|
|
|
//support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic. |
|
|
|
|
|
|
|
|
|
#if defined DOUBLE_SUPPORT |
|
|
|
|
#if defined (DOUBLE_SUPPORT) |
|
|
|
|
#ifdef cl_khr_fp64 |
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_fp64:enable |
|
|
|
|
#elif defined (cl_amd_fp64) |
|
|
|
|
#pragma OPENCL EXTENSION cl_amd_fp64:enable |
|
|
|
|
#endif |
|
|
|
|
typedef double F; |
|
|
|
|
typedef double4 F4; |
|
|
|
|
#define convert_F4 convert_double4 |
|
|
|
@ -81,8 +85,8 @@ inline void interpolateCubic( float x, float* coeffs ) |
|
|
|
|
/**********************************************8UC1********************************************* |
|
|
|
|
***********************************************************************************************/ |
|
|
|
|
__kernel void warpPerspectiveNN_C1_D0(__global uchar const * restrict src, __global uchar * dst, int src_cols, int src_rows, |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -112,14 +116,14 @@ __kernel void warpPerspectiveNN_C1_D0(__global uchar const * restrict src, __glo |
|
|
|
|
sval.s1 = scon.s1 ? src[spos.s1] : 0; |
|
|
|
|
sval.s2 = scon.s2 ? src[spos.s2] : 0; |
|
|
|
|
sval.s3 = scon.s3 ? src[spos.s3] : 0; |
|
|
|
|
dval = convert_uchar4(dcon != 0) ? sval : dval; |
|
|
|
|
dval = convert_uchar4(dcon) != (uchar4)(0,0,0,0) ? sval : dval; |
|
|
|
|
*d = dval; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, __global uchar * dst, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -142,7 +146,7 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _ |
|
|
|
|
int i; |
|
|
|
|
#pragma unroll 4 |
|
|
|
|
for(i=0; i<4; i++) |
|
|
|
|
v[i] = (sx+(i&1) >= 0 && sx+(i&1) < src_cols && sy+(i>>1) >= 0 && sy+(i>>1) < src_rows) ? src[src_offset + (sy+(i>>1)) * srcStep + (sx+(i&1))] : 0; |
|
|
|
|
v[i] = (sx+(i&1) >= 0 && sx+(i&1) < src_cols && sy+(i>>1) >= 0 && sy+(i>>1) < src_rows) ? src[src_offset + (sy+(i>>1)) * srcStep + (sx+(i&1))] : (uchar)0; |
|
|
|
|
|
|
|
|
|
short itab[4]; |
|
|
|
|
float tab1y[2], tab1x[2]; |
|
|
|
@ -170,8 +174,8 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar * dst, int src_cols, int src_rows, |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -190,15 +194,15 @@ __kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar * |
|
|
|
|
short ay = (short)(Y & (INTER_TAB_SIZE-1)); |
|
|
|
|
short ax = (short)(X & (INTER_TAB_SIZE-1)); |
|
|
|
|
|
|
|
|
|
uchar v[16]; |
|
|
|
|
uchar v[16]; |
|
|
|
|
int i, j; |
|
|
|
|
|
|
|
|
|
#pragma unroll 4 |
|
|
|
|
for(i=0; i<4; i++) |
|
|
|
|
for(j=0; j<4; j++) |
|
|
|
|
{ |
|
|
|
|
v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : 0; |
|
|
|
|
} |
|
|
|
|
for(j=0; j<4; j++) |
|
|
|
|
{ |
|
|
|
|
v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? src[src_offset+(sy+i) * srcStep + (sx+j)] : (uchar)0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
short itab[16]; |
|
|
|
|
float tab1y[4], tab1x[4]; |
|
|
|
@ -227,7 +231,7 @@ __kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar * |
|
|
|
|
if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] ) |
|
|
|
|
mk1 = k1, mk2 = k2; |
|
|
|
|
else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] ) |
|
|
|
|
Mk1 = k1, Mk2 = k2; |
|
|
|
|
Mk1 = k1, Mk2 = k2; |
|
|
|
|
} |
|
|
|
|
diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff)); |
|
|
|
|
} |
|
|
|
@ -249,8 +253,8 @@ __kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar * |
|
|
|
|
***********************************************************************************************/ |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveNN_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -273,8 +277,8 @@ __kernel void warpPerspectiveNN_C4_D0(__global uchar4 const * restrict src, __gl |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -299,10 +303,10 @@ __kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src, |
|
|
|
|
|
|
|
|
|
int4 v0, v1, v2, v3; |
|
|
|
|
|
|
|
|
|
v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx]) : 0; |
|
|
|
|
v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx+1]) : 0; |
|
|
|
|
v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx]) : 0; |
|
|
|
|
v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx+1]) : 0; |
|
|
|
|
v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx]) : (int4)0; |
|
|
|
|
v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? convert_int4(src[src_offset+sy * srcStep + sx+1]) : (int4)0; |
|
|
|
|
v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx]) : (int4)0; |
|
|
|
|
v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? convert_int4(src[src_offset+(sy+1) * srcStep + sx+1]) : (int4)0; |
|
|
|
|
|
|
|
|
|
int itab0, itab1, itab2, itab3; |
|
|
|
|
float taby, tabx; |
|
|
|
@ -323,8 +327,8 @@ __kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, __global uchar4 * dst, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -352,10 +356,10 @@ __kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, _ |
|
|
|
|
int i,j; |
|
|
|
|
#pragma unroll 4 |
|
|
|
|
for(i=0; i<4; i++) |
|
|
|
|
for(j=0; j<4; j++) |
|
|
|
|
{ |
|
|
|
|
v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)]) : (uchar4)0; |
|
|
|
|
} |
|
|
|
|
for(j=0; j<4; j++) |
|
|
|
|
{ |
|
|
|
|
v[i*4+j] = (sx+j >= 0 && sx+j < src_cols && sy+i >= 0 && sy+i < src_rows) ? (src[src_offset+(sy+i) * srcStep + (sx+j)]) : (uchar4)0; |
|
|
|
|
} |
|
|
|
|
int itab[16]; |
|
|
|
|
float tab1y[4], tab1x[4]; |
|
|
|
|
float axx, ayy; |
|
|
|
@ -381,14 +385,14 @@ __kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, _ |
|
|
|
|
int diff = isum - INTER_REMAP_COEF_SCALE; |
|
|
|
|
int Mk1=2, Mk2=2, mk1=2, mk2=2; |
|
|
|
|
|
|
|
|
|
for( k1 = 2; k1 < 4; k1++ ) |
|
|
|
|
for( k1 = 2; k1 < 4; k1++ ) |
|
|
|
|
for( k2 = 2; k2 < 4; k2++ ) |
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
if( itab[(k1<<2)+k2] < itab[(mk1<<2)+mk2] ) |
|
|
|
|
mk1 = k1, mk2 = k2; |
|
|
|
|
else if( itab[(k1<<2)+k2] > itab[(Mk1<<2)+Mk2] ) |
|
|
|
|
Mk1 = k1, Mk2 = k2; |
|
|
|
|
Mk1 = k1, Mk2 = k2; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
diff<0 ? (itab[(Mk1<<2)+Mk2]=(short)(itab[(Mk1<<2)+Mk2]-diff)) : (itab[(mk1<<2)+mk2]=(short)(itab[(mk1<<2)+mk2]-diff)); |
|
|
|
@ -411,8 +415,8 @@ __kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, _ |
|
|
|
|
***********************************************************************************************/ |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveNN_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows, |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -434,8 +438,8 @@ __kernel void warpPerspectiveNN_C1_D5(__global float * src, __global float * dst |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows, |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -458,10 +462,10 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float * |
|
|
|
|
|
|
|
|
|
float v0, v1, v2, v3; |
|
|
|
|
|
|
|
|
|
v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx] : 0; |
|
|
|
|
v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx+1] : 0; |
|
|
|
|
v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx] : 0; |
|
|
|
|
v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx+1] : 0; |
|
|
|
|
v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx] : (float)0; |
|
|
|
|
v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? src[src_offset+sy * srcStep + sx+1] : (float)0; |
|
|
|
|
v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx] : (float)0; |
|
|
|
|
v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? src[src_offset+(sy+1) * srcStep + sx+1] : (float)0; |
|
|
|
|
|
|
|
|
|
float tab[4]; |
|
|
|
|
float taby[2], tabx[2]; |
|
|
|
@ -483,8 +487,8 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float * |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float * dst, int src_cols, int src_rows, |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -510,7 +514,7 @@ __kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float * |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
for(i=0; i<16; i++) |
|
|
|
|
v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : 0; |
|
|
|
|
v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float)0; |
|
|
|
|
|
|
|
|
|
float tab[16]; |
|
|
|
|
float tab1y[4], tab1x[4]; |
|
|
|
@ -546,8 +550,8 @@ __kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float * |
|
|
|
|
***********************************************************************************************/ |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveNN_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows, |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -564,13 +568,13 @@ __kernel void warpPerspectiveNN_C4_D5(__global float4 * src, __global float4 * d |
|
|
|
|
short sy = (short)Y; |
|
|
|
|
|
|
|
|
|
if(dx >= 0 && dx < dst_cols && dy >= 0 && dy < dst_rows) |
|
|
|
|
dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx>=0 && sx<src_cols && sy>=0 && sy<src_rows) ? src[(src_offset>>4)+sy*(srcStep>>2)+sx] : 0; |
|
|
|
|
dst[(dst_offset>>4)+dy*(dstStep>>2)+dx]= (sx>=0 && sx<src_cols && sy>=0 && sy<src_rows) ? src[(src_offset>>4)+sy*(srcStep>>2)+sx] : (float)0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4 * dst, int src_cols, int src_rows, |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int dst_cols, int dst_rows, int srcStep, int dstStep, |
|
|
|
|
int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -597,10 +601,10 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4 |
|
|
|
|
|
|
|
|
|
float4 v0, v1, v2, v3; |
|
|
|
|
|
|
|
|
|
v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : 0; |
|
|
|
|
v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : 0; |
|
|
|
|
v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : 0; |
|
|
|
|
v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : 0; |
|
|
|
|
v0 = (sx0 >= 0 && sx0 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0] : (float4)0; |
|
|
|
|
v1 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0 >= 0 && sy0 < src_rows) ? src[src_offset+sy0 * srcStep + sx0+1] : (float4)0; |
|
|
|
|
v2 = (sx0 >= 0 && sx0 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0] : (float4)0; |
|
|
|
|
v3 = (sx0+1 >= 0 && sx0+1 < src_cols && sy0+1 >= 0 && sy0+1 < src_rows) ? src[src_offset+(sy0+1) * srcStep + sx0+1] : (float4)0; |
|
|
|
|
|
|
|
|
|
float tab[4]; |
|
|
|
|
float taby[2], tabx[2]; |
|
|
|
@ -622,8 +626,8 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4 |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4 * dst, |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
int src_cols, int src_rows, int dst_cols, int dst_rows, int srcStep, |
|
|
|
|
int dstStep, int src_offset, int dst_offset, __constant F * M, int threadCols ) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
@ -652,7 +656,7 @@ __kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4 |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
for(i=0; i<16; i++) |
|
|
|
|
v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : 0; |
|
|
|
|
v[i] = (sx+(i&3) >= 0 && sx+(i&3) < src_cols && sy+(i>>2) >= 0 && sy+(i>>2) < src_rows) ? src[src_offset+(sy+(i>>2)) * srcStep + (sx+(i&3))] : (float4)0; |
|
|
|
|
|
|
|
|
|
float tab[16]; |
|
|
|
|
float tab1y[4], tab1x[4]; |
|
|
|
@ -680,5 +684,6 @@ __kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4 |
|
|
|
|
dst[dst_offset+dy*dstStep+dx] = sum; |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|