|
|
|
@ -70,21 +70,47 @@ |
|
|
|
|
#define CV_PI M_PI_F |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define dstelem *(__global dstT*)(dstptr + dst_index) |
|
|
|
|
#define dstelem2 *(__global dstT*)(dstptr2 + dst_index2) |
|
|
|
|
#ifndef cn |
|
|
|
|
#define cn 1 |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if cn == 1 |
|
|
|
|
#undef srcT1_C1 |
|
|
|
|
#undef srcT2_C1 |
|
|
|
|
#undef dstT_C1 |
|
|
|
|
#define srcT1_C1 srcT1 |
|
|
|
|
#define srcT2_C1 srcT2 |
|
|
|
|
#define dstT_C1 dstT |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if cn != 3 |
|
|
|
|
#define storedst(val) *(__global dstT*)(dstptr + dst_index) = val |
|
|
|
|
#define storedst2(val) *(__global dstT*)(dstptr2 + dst_index2) = val |
|
|
|
|
#else |
|
|
|
|
#define storedst(val) vstore3(val, 0, (__global dstT_C1*)(dstptr + dst_index)) |
|
|
|
|
#define storedst2(val) vstore3(val, 0, (__global dstT_C1*)(dstptr2 + dst_index2)) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define noconvert |
|
|
|
|
|
|
|
|
|
#ifndef workT |
|
|
|
|
|
|
|
|
|
#ifndef srcT1 |
|
|
|
|
#define srcT1 dstT |
|
|
|
|
#define srcT1_C1 dstT_C1 |
|
|
|
|
#endif |
|
|
|
|
#ifndef srcT2 |
|
|
|
|
#define srcT2 dstT |
|
|
|
|
#define srcT2_C1 dstT_C1 |
|
|
|
|
#endif |
|
|
|
|
#define workT dstT |
|
|
|
|
#if cn != 3 |
|
|
|
|
#define srcelem1 *(__global srcT1*)(srcptr1 + src1_index) |
|
|
|
|
#define srcelem2 *(__global srcT2*)(srcptr2 + src2_index) |
|
|
|
|
#else |
|
|
|
|
#define srcelem1 vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index)) |
|
|
|
|
#define srcelem2 vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index)) |
|
|
|
|
#endif |
|
|
|
|
#ifndef convertToDT |
|
|
|
|
#define convertToDT noconvert |
|
|
|
|
#endif |
|
|
|
@ -94,153 +120,168 @@ |
|
|
|
|
#ifndef convertToWT2 |
|
|
|
|
#define convertToWT2 convertToWT1 |
|
|
|
|
#endif |
|
|
|
|
#if cn != 3 |
|
|
|
|
#define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index)) |
|
|
|
|
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index)) |
|
|
|
|
#else |
|
|
|
|
#define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index))) |
|
|
|
|
#define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index))) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#ifndef workST |
|
|
|
|
#define workST workT |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#define EXTRA_PARAMS |
|
|
|
|
#define EXTRA_INDEX |
|
|
|
|
|
|
|
|
|
#if defined OP_ADD |
|
|
|
|
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(convertToDT(srcelem1 + srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_SUB |
|
|
|
|
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 - srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(convertToDT(srcelem1 - srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_RSUB |
|
|
|
|
#define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1) |
|
|
|
|
#define PROCESS_ELEM storedst(convertToDT(srcelem2 - srcelem1)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_ABSDIFF |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT v = srcelem1 - srcelem2; \ |
|
|
|
|
dstelem = convertToDT(v >= (workT)(0) ? v : -v); |
|
|
|
|
storedst(convertToDT(v >= (workT)(0) ? v : -v)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_AND |
|
|
|
|
#define PROCESS_ELEM dstelem = srcelem1 & srcelem2 |
|
|
|
|
#define PROCESS_ELEM storedst(srcelem1 & srcelem2) |
|
|
|
|
|
|
|
|
|
#elif defined OP_OR |
|
|
|
|
#define PROCESS_ELEM dstelem = srcelem1 | srcelem2 |
|
|
|
|
#define PROCESS_ELEM storedst(srcelem1 | srcelem2) |
|
|
|
|
|
|
|
|
|
#elif defined OP_XOR |
|
|
|
|
#define PROCESS_ELEM dstelem = srcelem1 ^ srcelem2 |
|
|
|
|
#define PROCESS_ELEM storedst(srcelem1 ^ srcelem2) |
|
|
|
|
|
|
|
|
|
#elif defined OP_NOT |
|
|
|
|
#define PROCESS_ELEM dstelem = ~srcelem1 |
|
|
|
|
#define PROCESS_ELEM storedst(~srcelem1) |
|
|
|
|
|
|
|
|
|
#elif defined OP_MIN |
|
|
|
|
#define PROCESS_ELEM dstelem = min(srcelem1, srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(min(srcelem1, srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_MAX |
|
|
|
|
#define PROCESS_ELEM dstelem = max(srcelem1, srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(max(srcelem1, srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_MUL |
|
|
|
|
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_MUL_SCALE |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#ifdef UNARY_OP |
|
|
|
|
#define EXTRA_PARAMS , workT srcelem2, scaleT scale |
|
|
|
|
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale |
|
|
|
|
#undef srcelem2 |
|
|
|
|
#define srcelem2 srcelem2_ |
|
|
|
|
#else |
|
|
|
|
#define EXTRA_PARAMS , scaleT scale |
|
|
|
|
#endif |
|
|
|
|
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * scale * srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_DIV |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT e2 = srcelem2, zero = (workT)(0); \ |
|
|
|
|
dstelem = convertToDT(e2 != zero ? srcelem1 / e2 : zero) |
|
|
|
|
storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_DIV_SCALE |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#ifdef UNARY_OP |
|
|
|
|
#define EXTRA_PARAMS , workT srcelem2, scaleT scale |
|
|
|
|
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale |
|
|
|
|
#undef srcelem2 |
|
|
|
|
#define srcelem2 srcelem2_ |
|
|
|
|
#else |
|
|
|
|
#define EXTRA_PARAMS , scaleT scale |
|
|
|
|
#endif |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT e2 = srcelem2, zero = (workT)(0); \ |
|
|
|
|
dstelem = convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)) |
|
|
|
|
storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2))) |
|
|
|
|
|
|
|
|
|
#elif defined OP_RDIV_SCALE |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#ifdef UNARY_OP |
|
|
|
|
#define EXTRA_PARAMS , workT srcelem2, scaleT scale |
|
|
|
|
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale |
|
|
|
|
#undef srcelem2 |
|
|
|
|
#define srcelem2 srcelem2_ |
|
|
|
|
#else |
|
|
|
|
#define EXTRA_PARAMS , scaleT scale |
|
|
|
|
#endif |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT e1 = srcelem1, zero = (workT)(0); \ |
|
|
|
|
dstelem = convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)) |
|
|
|
|
storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1))) |
|
|
|
|
|
|
|
|
|
#elif defined OP_RECIP_SCALE |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#define EXTRA_PARAMS , scaleT scale |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT e1 = srcelem1, zero = (workT)(0); \ |
|
|
|
|
dstelem = convertToDT(e1 != zero ? scale / e1 : zero) |
|
|
|
|
storedst(convertToDT(e1 != zero ? scale / e1 : zero)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_ADDW |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma |
|
|
|
|
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma) |
|
|
|
|
#define PROCESS_ELEM storedst(convertToDT(srcelem1*alpha + srcelem2*beta + gamma)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_MAG |
|
|
|
|
#define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(hypot(srcelem1, srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_ABS_NOSAT |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
dstT v = convertToDT(srcelem1); \ |
|
|
|
|
dstelem = v >= 0 ? v : -v |
|
|
|
|
storedst(v >= 0 ? v : -v) |
|
|
|
|
|
|
|
|
|
#elif defined OP_PHASE_RADIANS |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT tmp = atan2(srcelem2, srcelem1); \ |
|
|
|
|
if(tmp < 0) tmp += 6.283185307179586232f; \ |
|
|
|
|
dstelem = tmp |
|
|
|
|
storedst(tmp) |
|
|
|
|
|
|
|
|
|
#elif defined OP_PHASE_DEGREES |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465f; \ |
|
|
|
|
if(tmp < 0) tmp += 360; \ |
|
|
|
|
dstelem = tmp |
|
|
|
|
storedst(tmp) |
|
|
|
|
|
|
|
|
|
#elif defined OP_EXP |
|
|
|
|
#define PROCESS_ELEM dstelem = exp(srcelem1) |
|
|
|
|
#define PROCESS_ELEM storedst(exp(srcelem1)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_POW |
|
|
|
|
#define PROCESS_ELEM dstelem = pow(srcelem1, srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(pow(srcelem1, srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_POWN |
|
|
|
|
#undef workT |
|
|
|
|
#define workT int |
|
|
|
|
#define PROCESS_ELEM dstelem = pown(srcelem1, srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(pown(srcelem1, srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_SQRT |
|
|
|
|
#define PROCESS_ELEM dstelem = sqrt(srcelem1) |
|
|
|
|
#define PROCESS_ELEM storedst(sqrt(srcelem1)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_LOG |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
dstT v = (dstT)(srcelem1);\ |
|
|
|
|
dstelem = v > (dstT)(0) ? log(v) : log(-v) |
|
|
|
|
dstT v = (dstT)(srcelem1);\ |
|
|
|
|
storedst(v > (dstT)(0) ? log(v) : log(-v)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_CMP |
|
|
|
|
#define dstT uchar |
|
|
|
|
#define srcT2 srcT1 |
|
|
|
|
#define convertToWT1 |
|
|
|
|
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0) |
|
|
|
|
#define PROCESS_ELEM storedst(convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_CONVERT_SCALE_ABS |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#define EXTRA_PARAMS , workT alpha, workT beta |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
workT value = srcelem1 * alpha + beta; \ |
|
|
|
|
dstelem = convertToDT(value >= 0 ? value : -value) |
|
|
|
|
storedst(convertToDT(value >= 0 ? value : -value)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_SCALE_ADD |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#define EXTRA_PARAMS , workT alpha |
|
|
|
|
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * alpha + srcelem2) |
|
|
|
|
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * alpha + srcelem2)) |
|
|
|
|
|
|
|
|
|
#elif defined OP_CTP_AD || defined OP_CTP_AR |
|
|
|
|
#ifdef OP_CTP_AD |
|
|
|
@ -257,8 +298,8 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) |
|
|
|
|
dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \ |
|
|
|
|
dstT cartToPolar = y2 <= x2 ? x * y / (x2 + 0.28f * y2 + CV_EPSILON) + tmp : (tmp1 - x * y / (y2 + 0.28f * x2 + CV_EPSILON)); \ |
|
|
|
|
TO_DEGREE \ |
|
|
|
|
dstelem = magnitude; \ |
|
|
|
|
dstelem2 = cartToPolar |
|
|
|
|
storedst(magnitude); \ |
|
|
|
|
storedst2(cartToPolar) |
|
|
|
|
|
|
|
|
|
#elif defined OP_PTC_AD || defined OP_PTC_AR |
|
|
|
|
#ifdef OP_PTC_AD |
|
|
|
@ -272,15 +313,15 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
dstT x = srcelem1, y = srcelem2; \ |
|
|
|
|
FROM_DEGREE; \ |
|
|
|
|
dstelem = cos(alpha) * x; \ |
|
|
|
|
dstelem2 = sin(alpha) * x |
|
|
|
|
storedst(cos(alpha) * x); \ |
|
|
|
|
storedst2(sin(alpha) * x) |
|
|
|
|
|
|
|
|
|
#elif defined OP_PATCH_NANS |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#define EXTRA_PARAMS , int val |
|
|
|
|
#define PROCESS_ELEM \ |
|
|
|
|
if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \ |
|
|
|
|
dstelem = val |
|
|
|
|
storedst(val) |
|
|
|
|
|
|
|
|
|
#else |
|
|
|
|
#error "unknown op type" |
|
|
|
@ -290,18 +331,26 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2 |
|
|
|
|
#undef EXTRA_INDEX |
|
|
|
|
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, x*(int)sizeof(dstT) + dstoffset2) |
|
|
|
|
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, x*(int)sizeof(dstT_C1)*cn + dstoffset2) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if defined UNARY_OP || defined MASK_UNARY_OP |
|
|
|
|
#undef srcelem2 |
|
|
|
|
|
|
|
|
|
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \ |
|
|
|
|
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \ |
|
|
|
|
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \ |
|
|
|
|
defined OP_MUL || defined OP_DIV || defined OP_POWN |
|
|
|
|
#undef EXTRA_PARAMS |
|
|
|
|
#define EXTRA_PARAMS , workT srcelem2 |
|
|
|
|
#define EXTRA_PARAMS , workST srcelem2_ |
|
|
|
|
#undef srcelem2 |
|
|
|
|
#define srcelem2 srcelem2_ |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if cn == 3 |
|
|
|
|
#undef srcelem2 |
|
|
|
|
#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z) |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if defined BINARY_OP |
|
|
|
@ -316,11 +365,11 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, |
|
|
|
|
|
|
|
|
|
if (x < cols && y < rows) |
|
|
|
|
{ |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1); |
|
|
|
|
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT)) |
|
|
|
|
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2) + srcoffset2); |
|
|
|
|
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2_C1)*cn + srcoffset2); |
|
|
|
|
#endif |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset); |
|
|
|
|
EXTRA_INDEX; |
|
|
|
|
|
|
|
|
|
PROCESS_ELEM; |
|
|
|
@ -343,9 +392,9 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, |
|
|
|
|
int mask_index = mad24(y, maskstep, x + maskoffset); |
|
|
|
|
if( mask[mask_index] ) |
|
|
|
|
{ |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); |
|
|
|
|
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2) + srcoffset2); |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1); |
|
|
|
|
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2_C1)*cn + srcoffset2); |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset); |
|
|
|
|
|
|
|
|
|
PROCESS_ELEM; |
|
|
|
|
} |
|
|
|
@ -363,9 +412,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, |
|
|
|
|
|
|
|
|
|
if (x < cols && y < rows) |
|
|
|
|
{ |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); |
|
|
|
|
EXTRA_INDEX; |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1); |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset); |
|
|
|
|
|
|
|
|
|
PROCESS_ELEM; |
|
|
|
|
} |
|
|
|
@ -386,8 +434,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, |
|
|
|
|
int mask_index = mad24(y, maskstep, x + maskoffset); |
|
|
|
|
if( mask[mask_index] ) |
|
|
|
|
{ |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); |
|
|
|
|
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1); |
|
|
|
|
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset); |
|
|
|
|
|
|
|
|
|
PROCESS_ELEM; |
|
|
|
|
} |
|
|
|
|