|
|
|
@ -99,56 +99,68 @@ enum |
|
|
|
|
#define hrange 0 |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if bidx == 0 |
|
|
|
|
#define R_COMP z |
|
|
|
|
#define G_COMP y |
|
|
|
|
#define B_COMP x |
|
|
|
|
#elif bidx == 2 |
|
|
|
|
#define R_COMP x |
|
|
|
|
#define G_COMP y |
|
|
|
|
#define B_COMP z |
|
|
|
|
#elif bidx == 3 |
|
|
|
|
// The only kernel that uses bidx == 3 doesn't use these macros. |
|
|
|
|
// But we still need to make the compiler happy. |
|
|
|
|
#define R_COMP w |
|
|
|
|
#define G_COMP w |
|
|
|
|
#define B_COMP w |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define __CAT(x, y) x##y |
|
|
|
|
#define CAT(x, y) __CAT(x, y) |
|
|
|
|
|
|
|
|
|
#define DATA_TYPE_4 CAT(DATA_TYPE, 4) |
|
|
|
|
|
|
|
|
|
///////////////////////////////////// RGB <-> GRAY ////////////////////////////////////// |
|
|
|
|
|
|
|
|
|
__kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset, |
|
|
|
|
__global uchar* dstptr, int dststep, int dstoffset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
#if 1 |
|
|
|
|
const int x = get_global_id(0); |
|
|
|
|
const int y = get_global_id(1); |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); |
|
|
|
|
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, src); |
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
dst[0] = src[bidx] * 0.114f + src[1] * 0.587f + src[(bidx^2)] * 0.299f; |
|
|
|
|
dst[0] = src_pix.B_COMP * 0.114f + src_pix.G_COMP * 0.587f + src_pix.R_COMP * 0.299f; |
|
|
|
|
#else |
|
|
|
|
dst[0] = (DATA_TYPE)CV_DESCALE((src[bidx] * B2Y + src[1] * G2Y + src[(bidx^2)] * R2Y), yuv_shift); |
|
|
|
|
dst[0] = (DATA_TYPE)CV_DESCALE((src_pix.B_COMP * B2Y + src_pix.G_COMP * G2Y + src_pix.R_COMP * R2Y), yuv_shift); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
const int x_min = get_global_id(0)*STRIPE_SIZE; |
|
|
|
|
const int x_max = min(x_min + STRIPE_SIZE, cols); |
|
|
|
|
const int y = get_global_id(1); |
|
|
|
|
|
|
|
|
|
if( y < rows ) |
|
|
|
|
{ |
|
|
|
|
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + |
|
|
|
|
mad24(y, srcstep, srcoffset)) + x_min*scn; |
|
|
|
|
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset)); |
|
|
|
|
int x; |
|
|
|
|
for( x = x_min; x < x_max; x++, src += scn ) |
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
dst[x] = src[bidx] * 0.114f + src[1] * 0.587f + src[(bidx^2)] * 0.299f; |
|
|
|
|
#else |
|
|
|
|
dst[x] = (DATA_TYPE)(mad24(src[bidx], B2Y, mad24(src[1], G2Y, |
|
|
|
|
mad24(src[(bidx^2)], R2Y, 1 << (yuv_shift-1)))) >> yuv_shift); |
|
|
|
|
#endif |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset, |
|
|
|
|
__global uchar* dstptr, int dststep, int dstoffset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
const int x = get_global_id(0); |
|
|
|
|
const int y = get_global_id(1); |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); |
|
|
|
|
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); |
|
|
|
@ -158,6 +170,9 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset, |
|
|
|
|
dst[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// |
|
|
|
@ -170,13 +185,18 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); |
|
|
|
|
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); |
|
|
|
|
DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2]; |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, src); |
|
|
|
|
DATA_TYPE b=src_pix.B_COMP, g=src_pix.G_COMP, r=src_pix.R_COMP; |
|
|
|
|
|
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
__constant float * coeffs = c_RGB2YUVCoeffs_f; |
|
|
|
@ -195,6 +215,9 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset, |
|
|
|
|
dst[1] = SAT_CAST( U ); |
|
|
|
|
dst[2] = SAT_CAST( V ); |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f }; |
|
|
|
@ -205,13 +228,18 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); |
|
|
|
|
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); |
|
|
|
|
DATA_TYPE Y = src[0], U = src[1], V = src[2]; |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, src); |
|
|
|
|
DATA_TYPE Y = src_pix.x, U = src_pix.y, V = src_pix.z; |
|
|
|
|
|
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
__constant float * coeffs = c_YUV2RGBCoeffs_f; |
|
|
|
@ -232,6 +260,9 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset, |
|
|
|
|
dst[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__constant int ITUR_BT_601_CY = 1220542; |
|
|
|
@ -246,9 +277,13 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows / 2 && x < cols / 2 ) |
|
|
|
|
if (x < cols / 2) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows / 2 ) |
|
|
|
|
{ |
|
|
|
|
__global const uchar* ysrc = srcptr + mad24(y << 1, srcstep, (x << 1) + srcoffset); |
|
|
|
|
__global const uchar* usrc = srcptr + mad24(rows + y, srcstep, (x << 1) + srcoffset); |
|
|
|
@ -299,6 +334,9 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff |
|
|
|
|
dst2[7] = 255; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// |
|
|
|
@ -311,13 +349,18 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset + x * scnbytes)); |
|
|
|
|
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset + x * dcnbytes)); |
|
|
|
|
DATA_TYPE b=src[bidx], g=src[1], r=src[bidx^2]; |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, src); |
|
|
|
|
DATA_TYPE b=src_pix.B_COMP, g=src_pix.G_COMP, r=src_pix.R_COMP; |
|
|
|
|
|
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
__constant float * coeffs = c_RGB2YCrCbCoeffs_f; |
|
|
|
@ -336,6 +379,9 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset |
|
|
|
|
dst[1] = SAT_CAST( Cr ); |
|
|
|
|
dst[2] = SAT_CAST( Cb ); |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f }; |
|
|
|
@ -346,16 +392,21 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
__global const DATA_TYPE * srcptr = (__global const DATA_TYPE*)(src + src_idx); |
|
|
|
|
__global DATA_TYPE * dstptr = (__global DATA_TYPE*)(dst + dst_idx); |
|
|
|
|
|
|
|
|
|
DATA_TYPE y = srcptr[0], cr = srcptr[1], cb = srcptr[2]; |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, srcptr); |
|
|
|
|
DATA_TYPE y = src_pix.x, cr = src_pix.y, cb = src_pix.z; |
|
|
|
|
|
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
__constant float * coeff = c_YCrCb2RGBCoeffs_f; |
|
|
|
@ -376,6 +427,9 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
dstptr[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
///////////////////////////////////// RGB <-> XYZ ////////////////////////////////////// |
|
|
|
@ -385,9 +439,13 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
int rows, int cols, __constant COEFF_TYPE * coeffs) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
|
int dy = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (dy < rows && dx < cols) |
|
|
|
|
if (dx < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (dy < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(dy, src_step, src_offset + dx * scnbytes); |
|
|
|
|
int dst_idx = mad24(dy, dst_step, dst_offset + dx * dcnbytes); |
|
|
|
@ -395,7 +453,8 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_idx); |
|
|
|
|
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_idx); |
|
|
|
|
|
|
|
|
|
DATA_TYPE r = src[0], g = src[1], b = src[2]; |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, src); |
|
|
|
|
DATA_TYPE r = src_pix.x, g = src_pix.y, b = src_pix.z; |
|
|
|
|
|
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
float x = r * coeffs[0] + g * coeffs[1] + b * coeffs[2]; |
|
|
|
@ -410,6 +469,9 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
dst[1] = SAT_CAST(y); |
|
|
|
|
dst[2] = SAT_CAST(z); |
|
|
|
|
} |
|
|
|
|
++dy; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offset, |
|
|
|
@ -417,9 +479,13 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
int rows, int cols, __constant COEFF_TYPE * coeffs) |
|
|
|
|
{ |
|
|
|
|
int dx = get_global_id(0); |
|
|
|
|
int dy = get_global_id(1); |
|
|
|
|
int dy = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (dy < rows && dx < cols) |
|
|
|
|
if (dx < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (dy < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(dy, src_step, src_offset + dx * scnbytes); |
|
|
|
|
int dst_idx = mad24(dy, dst_step, dst_offset + dx * dcnbytes); |
|
|
|
@ -427,7 +493,8 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_idx); |
|
|
|
|
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_idx); |
|
|
|
|
|
|
|
|
|
DATA_TYPE x = src[0], y = src[1], z = src[2]; |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, src); |
|
|
|
|
DATA_TYPE x = src_pix.x, y = src_pix.y, z = src_pix.z; |
|
|
|
|
|
|
|
|
|
#ifdef DEPTH_5 |
|
|
|
|
float b = x * coeffs[0] + y * coeffs[1] + z * coeffs[2]; |
|
|
|
@ -445,6 +512,9 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
dst[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++dy; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
///////////////////////////////////// RGB[A] <-> BGR[A] ////////////////////////////////////// |
|
|
|
@ -454,24 +524,29 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_idx); |
|
|
|
|
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_idx); |
|
|
|
|
DATA_TYPE_4 src_pix = vload4(0, src); |
|
|
|
|
|
|
|
|
|
#ifdef REVERSE |
|
|
|
|
dst[0] = src[2]; |
|
|
|
|
dst[1] = src[1]; |
|
|
|
|
dst[2] = src[0]; |
|
|
|
|
dst[0] = src_pix.z; |
|
|
|
|
dst[1] = src_pix.y; |
|
|
|
|
dst[2] = src_pix.x; |
|
|
|
|
#else |
|
|
|
|
dst[0] = src[0]; |
|
|
|
|
dst[1] = src[1]; |
|
|
|
|
dst[2] = src[2]; |
|
|
|
|
dst[0] = src_pix.x; |
|
|
|
|
dst[1] = src_pix.y; |
|
|
|
|
dst[2] = src_pix.z; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if dcn == 4 |
|
|
|
@ -482,6 +557,9 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset, |
|
|
|
|
#endif |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
///////////////////////////////////// RGB5x5 <-> RGB ////////////////////////////////////// |
|
|
|
@ -491,9 +569,13 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
@ -517,6 +599,9 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset |
|
|
|
|
#endif |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset, |
|
|
|
@ -524,22 +609,30 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
uchar4 src_pix = vload4(0, src + src_idx); |
|
|
|
|
|
|
|
|
|
#if greenbits == 6 |
|
|
|
|
*((__global ushort*)(dst + dst_idx)) = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~3) << 3)|((src[src_idx + (bidx^2)]&~7) << 8)); |
|
|
|
|
*((__global ushort*)(dst + dst_idx)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~3) << 3)|((src_pix.R_COMP&~7) << 8)); |
|
|
|
|
#elif scn == 3 |
|
|
|
|
*((__global ushort*)(dst + dst_idx)) = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)|((src[src_idx + (bidx^2)]&~7) << 7)); |
|
|
|
|
*((__global ushort*)(dst + dst_idx)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)|((src_pix.R_COMP&~7) << 7)); |
|
|
|
|
#else |
|
|
|
|
*((__global ushort*)(dst + dst_idx)) = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)| |
|
|
|
|
((src[src_idx + (bidx^2)]&~7) << 7)|(src[src_idx + 3] ? 0x8000 : 0)); |
|
|
|
|
*((__global ushort*)(dst + dst_idx)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)| |
|
|
|
|
((src_pix.R_COMP&~7) << 7)|(src_pix.w ? 0x8000 : 0)); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
///////////////////////////////////// RGB5x5 <-> Gray ////////////////////////////////////// |
|
|
|
@ -549,9 +642,13 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x); |
|
|
|
@ -567,6 +664,9 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse |
|
|
|
|
((t >> 7) & 0xf8)*R2Y, yuv_shift); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offset, |
|
|
|
@ -574,9 +674,13 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
@ -589,6 +693,9 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse |
|
|
|
|
*((__global ushort*)(dst + dst_idx)) = (ushort)(t|(t << 5)|(t << 10)); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//////////////////////////////////// RGB <-> HSV ////////////////////////////////////// |
|
|
|
@ -608,14 +715,19 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
__constant int * sdiv_table, __constant int * hdiv_table) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
uchar4 src_pix = vload4(0, src + src_idx); |
|
|
|
|
|
|
|
|
|
int b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)]; |
|
|
|
|
int b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP; |
|
|
|
|
int h, s, v = b; |
|
|
|
|
int vmin = b, diff; |
|
|
|
|
int vr, vg; |
|
|
|
@ -639,6 +751,9 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
dst[dst_idx + 1] = (uchar)s; |
|
|
|
|
dst[dst_idx + 2] = (uchar)v; |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
@ -646,14 +761,19 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
uchar4 src_pix = vload4(0, src + src_idx); |
|
|
|
|
|
|
|
|
|
float h = src[src_idx], s = src[src_idx + 1]*(1/255.f), v = src[src_idx + 2]*(1/255.f); |
|
|
|
|
float h = src_pix.x, s = src_pix.y*(1/255.f), v = src_pix.z*(1/255.f); |
|
|
|
|
float b, g, r; |
|
|
|
|
|
|
|
|
|
if (s != 0) |
|
|
|
@ -692,6 +812,9 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
dst[dst_idx + 3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#elif defined DEPTH_5 |
|
|
|
@ -701,17 +824,22 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
__global const float * src = (__global const float *)(srcptr + src_idx); |
|
|
|
|
__global float * dst = (__global float *)(dstptr + dst_idx); |
|
|
|
|
float4 src_pix = vload4(0, src); |
|
|
|
|
|
|
|
|
|
float b = src[bidx], g = src[1], r = src[bidx^2]; |
|
|
|
|
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP; |
|
|
|
|
float h, s, v; |
|
|
|
|
|
|
|
|
|
float vmin, diff; |
|
|
|
@ -738,6 +866,9 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
dst[1] = s; |
|
|
|
|
dst[2] = v; |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset, |
|
|
|
@ -745,17 +876,22 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
__global const float * src = (__global const float *)(srcptr + src_idx); |
|
|
|
|
__global float * dst = (__global float *)(dstptr + dst_idx); |
|
|
|
|
float4 src_pix = vload4(0, src); |
|
|
|
|
|
|
|
|
|
float h = src[0], s = src[1], v = src[2]; |
|
|
|
|
float h = src_pix.x, s = src_pix.y, v = src_pix.z; |
|
|
|
|
float b, g, r; |
|
|
|
|
|
|
|
|
|
if (s != 0) |
|
|
|
@ -794,6 +930,9 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
dst[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
@ -807,14 +946,19 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
uchar4 src_pix = vload4(0, src + src_idx); |
|
|
|
|
|
|
|
|
|
float b = src[src_idx + bidx]*(1/255.f), g = src[src_idx + 1]*(1/255.f), r = src[src_idx + (bidx^2)]*(1/255.f); |
|
|
|
|
float b = src_pix.B_COMP*(1/255.f), g = src_pix.G_COMP*(1/255.f), r = src_pix.R_COMP*(1/255.f); |
|
|
|
|
float h = 0.f, s = 0.f, l; |
|
|
|
|
float vmin, vmax, diff; |
|
|
|
|
|
|
|
|
@ -846,6 +990,9 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f); |
|
|
|
|
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f); |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
@ -853,14 +1000,19 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
uchar4 src_pix = vload4(0, src + src_idx); |
|
|
|
|
|
|
|
|
|
float h = src[src_idx], l = src[src_idx + 1]*(1.f/255.f), s = src[src_idx + 2]*(1.f/255.f); |
|
|
|
|
float h = src_pix.x, l = src_pix.y*(1.f/255.f), s = src_pix.z*(1.f/255.f); |
|
|
|
|
float b, g, r; |
|
|
|
|
|
|
|
|
|
if (s != 0) |
|
|
|
@ -898,6 +1050,9 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset, |
|
|
|
|
dst[dst_idx + 3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#elif defined DEPTH_5 |
|
|
|
@ -907,17 +1062,22 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
__global const float * src = (__global const float *)(srcptr + src_idx); |
|
|
|
|
__global float * dst = (__global float *)(dstptr + dst_idx); |
|
|
|
|
float4 src_pix = vload4(0, src); |
|
|
|
|
|
|
|
|
|
float b = src[bidx], g = src[1], r = src[bidx^2]; |
|
|
|
|
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP; |
|
|
|
|
float h = 0.f, s = 0.f, l; |
|
|
|
|
float vmin, vmax, diff; |
|
|
|
|
|
|
|
|
@ -949,6 +1109,9 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
dst[1] = l; |
|
|
|
|
dst[2] = s; |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset, |
|
|
|
@ -956,17 +1119,22 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
__global const float * src = (__global const float *)(srcptr + src_idx); |
|
|
|
|
__global float * dst = (__global float *)(dstptr + dst_idx); |
|
|
|
|
float4 src_pix = vload4(0, src); |
|
|
|
|
|
|
|
|
|
float h = src[0], l = src[1], s = src[2]; |
|
|
|
|
float h = src_pix.x, l = src_pix.y, s = src_pix.z; |
|
|
|
|
float b, g, r; |
|
|
|
|
|
|
|
|
|
if (s != 0) |
|
|
|
@ -1005,6 +1173,9 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset |
|
|
|
|
dst[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
@ -1018,22 +1189,29 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
x <<= 2; |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x); |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + (x << 2)); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2)); |
|
|
|
|
uchar4 src_pix = vload4(0, src + src_idx); |
|
|
|
|
|
|
|
|
|
uchar v0 = src[src_idx], v1 = src[src_idx + 1]; |
|
|
|
|
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3]; |
|
|
|
|
uchar v0 = src_pix.x, v1 = src_pix.y; |
|
|
|
|
uchar v2 = src_pix.z, v3 = src_pix.w; |
|
|
|
|
|
|
|
|
|
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM; |
|
|
|
|
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM; |
|
|
|
|
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM; |
|
|
|
|
dst[dst_idx + 3] = v3; |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset, |
|
|
|
@ -1041,16 +1219,20 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset |
|
|
|
|
int rows, int cols) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
x <<= 2; |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x); |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + (x << 2)); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2)); |
|
|
|
|
uchar4 src_pix = vload4(0, src + src_idx); |
|
|
|
|
|
|
|
|
|
uchar v0 = src[src_idx], v1 = src[src_idx + 1]; |
|
|
|
|
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3]; |
|
|
|
|
uchar v0 = src_pix.x, v1 = src_pix.y; |
|
|
|
|
uchar v2 = src_pix.z, v3 = src_pix.w; |
|
|
|
|
uchar v3_half = v3 / 2; |
|
|
|
|
|
|
|
|
|
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3; |
|
|
|
@ -1058,6 +1240,9 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset |
|
|
|
|
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3; |
|
|
|
|
dst[dst_idx + 3] = v3; |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
@ -1086,21 +1271,26 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset, |
|
|
|
|
__constant int * coeffs, int Lscale, int Lshift) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
src += src_idx; |
|
|
|
|
dst += dst_idx; |
|
|
|
|
__global const uchar* src_ptr = src + src_idx; |
|
|
|
|
__global uchar* dst_ptr = dst + dst_idx; |
|
|
|
|
uchar4 src_pix = vload4(0, src_ptr); |
|
|
|
|
|
|
|
|
|
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], |
|
|
|
|
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], |
|
|
|
|
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; |
|
|
|
|
|
|
|
|
|
int R = gammaTab[src[0]], G = gammaTab[src[1]], B = gammaTab[src[2]]; |
|
|
|
|
int R = gammaTab[src_pix.x], G = gammaTab[src_pix.y], B = gammaTab[src_pix.z]; |
|
|
|
|
int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)]; |
|
|
|
|
int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)]; |
|
|
|
|
int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)]; |
|
|
|
@ -1109,9 +1299,12 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset, |
|
|
|
|
int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 ); |
|
|
|
|
int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 ); |
|
|
|
|
|
|
|
|
|
dst[0] = SAT_CAST(L); |
|
|
|
|
dst[1] = SAT_CAST(a); |
|
|
|
|
dst[2] = SAT_CAST(b); |
|
|
|
|
dst_ptr[0] = SAT_CAST(L); |
|
|
|
|
dst_ptr[1] = SAT_CAST(a); |
|
|
|
|
dst_ptr[2] = SAT_CAST(b); |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1125,23 +1318,28 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
__constant float * coeffs, float _1_3, float _a) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
__global const float * src = (__global const float *)(srcptr + src_idx); |
|
|
|
|
__global float * dst = (__global float *)(dstptr + dst_idx); |
|
|
|
|
float4 src_pix = vload4(0, src); |
|
|
|
|
|
|
|
|
|
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], |
|
|
|
|
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], |
|
|
|
|
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; |
|
|
|
|
|
|
|
|
|
float R = clamp(src[0], 0.0f, 1.0f); |
|
|
|
|
float G = clamp(src[1], 0.0f, 1.0f); |
|
|
|
|
float B = clamp(src[2], 0.0f, 1.0f); |
|
|
|
|
float R = clamp(src_pix.x, 0.0f, 1.0f); |
|
|
|
|
float G = clamp(src_pix.y, 0.0f, 1.0f); |
|
|
|
|
float B = clamp(src_pix.z, 0.0f, 1.0f); |
|
|
|
|
|
|
|
|
|
#ifdef SRGB |
|
|
|
|
R = splineInterpolate(R * GammaTabScale, gammaTab, GAMMA_TAB_SIZE); |
|
|
|
@ -1165,6 +1363,9 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
dst[1] = a; |
|
|
|
|
dst[2] = b; |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
@ -1225,20 +1426,25 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset, |
|
|
|
|
__constant float * coeffs, float lThresh, float fThresh) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
src += src_idx; |
|
|
|
|
dst += dst_idx; |
|
|
|
|
__global const uchar* src_ptr = src + src_idx; |
|
|
|
|
__global uchar* dst_ptr = dst + dst_idx; |
|
|
|
|
uchar4 src_pix = vload4(0, src_ptr); |
|
|
|
|
|
|
|
|
|
float srcbuf[3], dstbuf[3]; |
|
|
|
|
srcbuf[0] = src[0]*(100.f/255.f); |
|
|
|
|
srcbuf[1] = convert_float(src[1] - 128); |
|
|
|
|
srcbuf[2] = convert_float(src[2] - 128); |
|
|
|
|
srcbuf[0] = src_pix.x*(100.f/255.f); |
|
|
|
|
srcbuf[1] = convert_float(src_pix.y - 128); |
|
|
|
|
srcbuf[2] = convert_float(src_pix.z - 128); |
|
|
|
|
|
|
|
|
|
Lab2BGR_f(&srcbuf[0], &dstbuf[0], |
|
|
|
|
#ifdef SRGB |
|
|
|
@ -1246,13 +1452,16 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset, |
|
|
|
|
#endif |
|
|
|
|
coeffs, lThresh, fThresh); |
|
|
|
|
|
|
|
|
|
dst[0] = SAT_CAST(dstbuf[0] * 255.0f); |
|
|
|
|
dst[1] = SAT_CAST(dstbuf[1] * 255.0f); |
|
|
|
|
dst[2] = SAT_CAST(dstbuf[2] * 255.0f); |
|
|
|
|
dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f); |
|
|
|
|
dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f); |
|
|
|
|
dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f); |
|
|
|
|
#if dcn == 4 |
|
|
|
|
dst[3] = MAX_NUM; |
|
|
|
|
dst_ptr[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#elif defined DEPTH_5 |
|
|
|
@ -1265,18 +1474,23 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
__constant float * coeffs, float lThresh, float fThresh) |
|
|
|
|
{ |
|
|
|
|
int x = get_global_id(0); |
|
|
|
|
int y = get_global_id(1); |
|
|
|
|
int y = get_global_id(1) * PIX_PER_WI_Y; |
|
|
|
|
|
|
|
|
|
if (y < rows && x < cols) |
|
|
|
|
if (x < cols) |
|
|
|
|
{ |
|
|
|
|
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) |
|
|
|
|
{ |
|
|
|
|
if (y < rows) |
|
|
|
|
{ |
|
|
|
|
int src_idx = mad24(y, src_step, src_offset + x * scnbytes); |
|
|
|
|
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes); |
|
|
|
|
|
|
|
|
|
__global const float * src = (__global const float *)(srcptr + src_idx); |
|
|
|
|
__global float * dst = (__global float *)(dstptr + dst_idx); |
|
|
|
|
float4 src_pix = vload4(0, src); |
|
|
|
|
|
|
|
|
|
float srcbuf[3], dstbuf[3]; |
|
|
|
|
srcbuf[0] = src[0], srcbuf[1] = src[1], srcbuf[2] = src[2]; |
|
|
|
|
srcbuf[0] = src_pix.x, srcbuf[1] = src_pix.y, srcbuf[2] = src_pix.z; |
|
|
|
|
|
|
|
|
|
Lab2BGR_f(&srcbuf[0], &dstbuf[0], |
|
|
|
|
#ifdef SRGB |
|
|
|
@ -1289,6 +1503,9 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse |
|
|
|
|
dst[3] = MAX_NUM; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
++y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|