added 3-channel support to arithmetic operations

pull/2326/head
Vadim Pisarevsky 11 years ago
parent 7a35f4593e
commit f7620dc7d1
  1. 34
      modules/core/src/arithm.cpp
  2. 164
      modules/core/src/opencl/arithm.cl

@ -934,16 +934,23 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if( oclop < 0 || ((haveMask || haveScalar) && (cn > 4 || cn == 3)) || if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) ||
(!doubleSupport && srcdepth == CV_64F)) (!doubleSupport && srcdepth == CV_64F))
return false; return false;
char opts[1024]; char opts[1024];
int kercn = haveMask || haveScalar ? cn : 1; int kercn = haveMask || haveScalar ? cn : 1;
sprintf(opts, "-D %s%s -D %s -D dstT=%s%s", int scalarcn = kercn == 3 ? 4 : kercn;
sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop], (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop],
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) : bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "",
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) :
ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)),
bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) :
ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)),
kercn);
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
if( k.empty() ) if( k.empty() )
@ -960,7 +967,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if( haveScalar ) if( haveScalar )
{ {
size_t esz = CV_ELEM_SIZE(srctype); size_t esz = CV_ELEM_SIZE1(srctype)*scalarcn;
double buf[4] = {0,0,0,0}; double buf[4] = {0,0,0,0};
if( oclop != OCL_OP_NOT ) if( oclop != OCL_OP_NOT )
@ -1294,7 +1301,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
bool haveMask = !_mask.empty(); bool haveMask = !_mask.empty();
if( ((haveMask || haveScalar) && (cn > 4 || cn == 3)) ) if( ((haveMask || haveScalar) && cn > 4) )
return false; return false;
int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype)); int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype));
@ -1307,21 +1314,26 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
return false; return false;
int kercn = haveMask || haveScalar ? cn : 1; int kercn = haveMask || haveScalar ? cn : 1;
int scalarcn = kercn == 3 ? 4 : kercn;
char cvtstr[4][32], opts[1024]; char cvtstr[4][32], opts[1024];
sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s " sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
"-D dstT=%s -D workT=%s -D scaleT=%s -D convertToWT1=%s " "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s", "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
ocl::typeToStr(CV_MAKETYPE(depth1, 1)),
ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
ocl::typeToStr(CV_MAKETYPE(depth2, 1)),
ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(ddepth, 1)),
ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, 1)), ocl::typeToStr(CV_MAKETYPE(wdepth, 1)),
ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""); doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn);
size_t usrdata_esz = CV_ELEM_SIZE(wdepth); size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
const uchar* usrdata_p = (const uchar*)usrdata; const uchar* usrdata_p = (const uchar*)usrdata;
@ -1352,7 +1364,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if( haveScalar ) if( haveScalar )
{ {
size_t esz = CV_ELEM_SIZE(wtype); size_t esz = CV_ELEM_SIZE(wtype)*scalarcn;
double buf[4]={0,0,0,0}; double buf[4]={0,0,0,0};
Mat src2sc = _src2.getMat(); Mat src2sc = _src2.getMat();
@ -2621,7 +2633,7 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D BINARY_OP -D srcT1=%s -D workT=srcT1" format("-D BINARY_OP -D srcT1=%s -D workT=srcT1 -D cn=1"
" -D OP_CMP -D CMP_OPERATOR=%s%s", " -D OP_CMP -D CMP_OPERATOR=%s%s",
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
operationMap[op], operationMap[op],

@ -70,21 +70,47 @@
#define CV_PI M_PI_F #define CV_PI M_PI_F
#endif #endif
#define dstelem *(__global dstT*)(dstptr + dst_index) #ifndef cn
#define dstelem2 *(__global dstT*)(dstptr2 + dst_index2) #define cn 1
#endif
#if cn == 1
#undef srcT1_C1
#undef srcT2_C1
#undef dstT_C1
#define srcT1_C1 srcT1
#define srcT2_C1 srcT2
#define dstT_C1 dstT
#endif
#if cn != 3
#define storedst(val) *(__global dstT*)(dstptr + dst_index) = val
#define storedst2(val) *(__global dstT*)(dstptr2 + dst_index2) = val
#else
#define storedst(val) vstore3(val, 0, (__global dstT_C1*)(dstptr + dst_index))
#define storedst2(val) vstore3(val, 0, (__global dstT_C1*)(dstptr2 + dst_index2))
#endif
#define noconvert #define noconvert
#ifndef workT #ifndef workT
#ifndef srcT1 #ifndef srcT1
#define srcT1 dstT #define srcT1 dstT
#define srcT1_C1 dstT_C1
#endif #endif
#ifndef srcT2 #ifndef srcT2
#define srcT2 dstT #define srcT2 dstT
#define srcT2_C1 dstT_C1
#endif #endif
#define workT dstT #define workT dstT
#define srcelem1 *(__global srcT1*)(srcptr1 + src1_index) #if cn != 3
#define srcelem2 *(__global srcT2*)(srcptr2 + src2_index) #define srcelem1 *(__global srcT1*)(srcptr1 + src1_index)
#define srcelem2 *(__global srcT2*)(srcptr2 + src2_index)
#else
#define srcelem1 vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index))
#define srcelem2 vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index))
#endif
#ifndef convertToDT #ifndef convertToDT
#define convertToDT noconvert #define convertToDT noconvert
#endif #endif
@ -94,153 +120,168 @@
#ifndef convertToWT2 #ifndef convertToWT2
#define convertToWT2 convertToWT1 #define convertToWT2 convertToWT1
#endif #endif
#define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index)) #if cn != 3
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index)) #define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index))
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index))
#else
#define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index)))
#define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index)))
#endif
#endif
#ifndef workST
#define workST workT
#endif #endif
#define EXTRA_PARAMS #define EXTRA_PARAMS
#define EXTRA_INDEX #define EXTRA_INDEX
#if defined OP_ADD #if defined OP_ADD
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2) #define PROCESS_ELEM storedst(convertToDT(srcelem1 + srcelem2))
#elif defined OP_SUB #elif defined OP_SUB
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 - srcelem2) #define PROCESS_ELEM storedst(convertToDT(srcelem1 - srcelem2))
#elif defined OP_RSUB #elif defined OP_RSUB
#define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1) #define PROCESS_ELEM storedst(convertToDT(srcelem2 - srcelem1))
#elif defined OP_ABSDIFF #elif defined OP_ABSDIFF
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT v = srcelem1 - srcelem2; \ workT v = srcelem1 - srcelem2; \
dstelem = convertToDT(v >= (workT)(0) ? v : -v); storedst(convertToDT(v >= (workT)(0) ? v : -v))
#elif defined OP_AND #elif defined OP_AND
#define PROCESS_ELEM dstelem = srcelem1 & srcelem2 #define PROCESS_ELEM storedst(srcelem1 & srcelem2)
#elif defined OP_OR #elif defined OP_OR
#define PROCESS_ELEM dstelem = srcelem1 | srcelem2 #define PROCESS_ELEM storedst(srcelem1 | srcelem2)
#elif defined OP_XOR #elif defined OP_XOR
#define PROCESS_ELEM dstelem = srcelem1 ^ srcelem2 #define PROCESS_ELEM storedst(srcelem1 ^ srcelem2)
#elif defined OP_NOT #elif defined OP_NOT
#define PROCESS_ELEM dstelem = ~srcelem1 #define PROCESS_ELEM storedst(~srcelem1)
#elif defined OP_MIN #elif defined OP_MIN
#define PROCESS_ELEM dstelem = min(srcelem1, srcelem2) #define PROCESS_ELEM storedst(min(srcelem1, srcelem2))
#elif defined OP_MAX #elif defined OP_MAX
#define PROCESS_ELEM dstelem = max(srcelem1, srcelem2) #define PROCESS_ELEM storedst(max(srcelem1, srcelem2))
#elif defined OP_MUL #elif defined OP_MUL
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2) #define PROCESS_ELEM storedst(convertToDT(srcelem1 * srcelem2))
#elif defined OP_MUL_SCALE #elif defined OP_MUL_SCALE
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#ifdef UNARY_OP #ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale #define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
#else #else
#define EXTRA_PARAMS , scaleT scale #define EXTRA_PARAMS , scaleT scale
#endif #endif
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * scale * srcelem2) #define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2))
#elif defined OP_DIV #elif defined OP_DIV
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \ workT e2 = srcelem2, zero = (workT)(0); \
dstelem = convertToDT(e2 != zero ? srcelem1 / e2 : zero) storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero))
#elif defined OP_DIV_SCALE #elif defined OP_DIV_SCALE
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#ifdef UNARY_OP #ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale #define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
#else #else
#define EXTRA_PARAMS , scaleT scale #define EXTRA_PARAMS , scaleT scale
#endif #endif
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \ workT e2 = srcelem2, zero = (workT)(0); \
dstelem = convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)) storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)))
#elif defined OP_RDIV_SCALE #elif defined OP_RDIV_SCALE
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#ifdef UNARY_OP #ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale #define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
#else #else
#define EXTRA_PARAMS , scaleT scale #define EXTRA_PARAMS , scaleT scale
#endif #endif
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \ workT e1 = srcelem1, zero = (workT)(0); \
dstelem = convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)) storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)))
#elif defined OP_RECIP_SCALE #elif defined OP_RECIP_SCALE
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , scaleT scale #define EXTRA_PARAMS , scaleT scale
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \ workT e1 = srcelem1, zero = (workT)(0); \
dstelem = convertToDT(e1 != zero ? scale / e1 : zero) storedst(convertToDT(e1 != zero ? scale / e1 : zero))
#elif defined OP_ADDW #elif defined OP_ADDW
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma #define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma) #define PROCESS_ELEM storedst(convertToDT(srcelem1*alpha + srcelem2*beta + gamma))
#elif defined OP_MAG #elif defined OP_MAG
#define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2) #define PROCESS_ELEM storedst(hypot(srcelem1, srcelem2))
#elif defined OP_ABS_NOSAT #elif defined OP_ABS_NOSAT
#define PROCESS_ELEM \ #define PROCESS_ELEM \
dstT v = convertToDT(srcelem1); \ dstT v = convertToDT(srcelem1); \
dstelem = v >= 0 ? v : -v storedst(v >= 0 ? v : -v)
#elif defined OP_PHASE_RADIANS #elif defined OP_PHASE_RADIANS
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT tmp = atan2(srcelem2, srcelem1); \ workT tmp = atan2(srcelem2, srcelem1); \
if(tmp < 0) tmp += 6.283185307179586232f; \ if(tmp < 0) tmp += 6.283185307179586232f; \
dstelem = tmp storedst(tmp)
#elif defined OP_PHASE_DEGREES #elif defined OP_PHASE_DEGREES
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465f; \ workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465f; \
if(tmp < 0) tmp += 360; \ if(tmp < 0) tmp += 360; \
dstelem = tmp storedst(tmp)
#elif defined OP_EXP #elif defined OP_EXP
#define PROCESS_ELEM dstelem = exp(srcelem1) #define PROCESS_ELEM storedst(exp(srcelem1))
#elif defined OP_POW #elif defined OP_POW
#define PROCESS_ELEM dstelem = pow(srcelem1, srcelem2) #define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
#elif defined OP_POWN #elif defined OP_POWN
#undef workT #undef workT
#define workT int #define workT int
#define PROCESS_ELEM dstelem = pown(srcelem1, srcelem2) #define PROCESS_ELEM storedst(pown(srcelem1, srcelem2))
#elif defined OP_SQRT #elif defined OP_SQRT
#define PROCESS_ELEM dstelem = sqrt(srcelem1) #define PROCESS_ELEM storedst(sqrt(srcelem1))
#elif defined OP_LOG #elif defined OP_LOG
#define PROCESS_ELEM \ #define PROCESS_ELEM \
dstT v = (dstT)(srcelem1);\ dstT v = (dstT)(srcelem1);\
dstelem = v > (dstT)(0) ? log(v) : log(-v) storedst(v > (dstT)(0) ? log(v) : log(-v))
#elif defined OP_CMP #elif defined OP_CMP
#define dstT uchar #define dstT uchar
#define srcT2 srcT1 #define srcT2 srcT1
#define convertToWT1 #define convertToWT1
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0) #define PROCESS_ELEM storedst(convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0))
#elif defined OP_CONVERT_SCALE_ABS #elif defined OP_CONVERT_SCALE_ABS
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha, workT beta #define EXTRA_PARAMS , workT alpha, workT beta
#define PROCESS_ELEM \ #define PROCESS_ELEM \
workT value = srcelem1 * alpha + beta; \ workT value = srcelem1 * alpha + beta; \
dstelem = convertToDT(value >= 0 ? value : -value) storedst(convertToDT(value >= 0 ? value : -value))
#elif defined OP_SCALE_ADD #elif defined OP_SCALE_ADD
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha #define EXTRA_PARAMS , workT alpha
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * alpha + srcelem2) #define PROCESS_ELEM storedst(convertToDT(srcelem1 * alpha + srcelem2))
#elif defined OP_CTP_AD || defined OP_CTP_AR #elif defined OP_CTP_AD || defined OP_CTP_AR
#ifdef OP_CTP_AD #ifdef OP_CTP_AD
@ -257,8 +298,8 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \ dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \
dstT cartToPolar = y2 <= x2 ? x * y / (x2 + 0.28f * y2 + CV_EPSILON) + tmp : (tmp1 - x * y / (y2 + 0.28f * x2 + CV_EPSILON)); \ dstT cartToPolar = y2 <= x2 ? x * y / (x2 + 0.28f * y2 + CV_EPSILON) + tmp : (tmp1 - x * y / (y2 + 0.28f * x2 + CV_EPSILON)); \
TO_DEGREE \ TO_DEGREE \
dstelem = magnitude; \ storedst(magnitude); \
dstelem2 = cartToPolar storedst2(cartToPolar)
#elif defined OP_PTC_AD || defined OP_PTC_AR #elif defined OP_PTC_AD || defined OP_PTC_AR
#ifdef OP_PTC_AD #ifdef OP_PTC_AD
@ -272,15 +313,15 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#define PROCESS_ELEM \ #define PROCESS_ELEM \
dstT x = srcelem1, y = srcelem2; \ dstT x = srcelem1, y = srcelem2; \
FROM_DEGREE; \ FROM_DEGREE; \
dstelem = cos(alpha) * x; \ storedst(cos(alpha) * x); \
dstelem2 = sin(alpha) * x storedst2(sin(alpha) * x)
#elif defined OP_PATCH_NANS #elif defined OP_PATCH_NANS
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , int val #define EXTRA_PARAMS , int val
#define PROCESS_ELEM \ #define PROCESS_ELEM \
if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \ if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \
dstelem = val storedst(val)
#else #else
#error "unknown op type" #error "unknown op type"
@ -290,18 +331,26 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2 #define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
#undef EXTRA_INDEX #undef EXTRA_INDEX
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, x*(int)sizeof(dstT) + dstoffset2) #define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, x*(int)sizeof(dstT_C1)*cn + dstoffset2)
#endif #endif
#if defined UNARY_OP || defined MASK_UNARY_OP #if defined UNARY_OP || defined MASK_UNARY_OP
#undef srcelem2
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \ #if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \ defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \ defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
defined OP_MUL || defined OP_DIV || defined OP_POWN defined OP_MUL || defined OP_DIV || defined OP_POWN
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT srcelem2 #define EXTRA_PARAMS , workST srcelem2_
#undef srcelem2
#define srcelem2 srcelem2_
#endif #endif
#if cn == 3
#undef srcelem2
#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
#endif
#endif #endif
#if defined BINARY_OP #if defined BINARY_OP
@ -316,11 +365,11 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
if (x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1);
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT)) #if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2) + srcoffset2); int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2_C1)*cn + srcoffset2);
#endif #endif
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset);
EXTRA_INDEX; EXTRA_INDEX;
PROCESS_ELEM; PROCESS_ELEM;
@ -343,9 +392,9 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
int mask_index = mad24(y, maskstep, x + maskoffset); int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] ) if( mask[mask_index] )
{ {
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1);
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2) + srcoffset2); int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2_C1)*cn + srcoffset2);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset);
PROCESS_ELEM; PROCESS_ELEM;
} }
@ -363,9 +412,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
if (x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset);
EXTRA_INDEX;
PROCESS_ELEM; PROCESS_ELEM;
} }
@ -386,8 +434,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
int mask_index = mad24(y, maskstep, x + maskoffset); int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] ) if( mask[mask_index] )
{ {
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1); int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset);
PROCESS_ELEM; PROCESS_ELEM;
} }

Loading…
Cancel
Save