diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index c5e561e26e..9de474b402 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1643,7 +1643,7 @@ static bool ocl_inRange( InputArray _src, InputArray _lowerb, if (kercn % cn != 0) kercn = cn; int colsPerWI = kercn / cn; - String opts = format("%s-D cn=%d -D srcT=%s -D srcT1=%s -D dstT=%s -D kercn=%d -D depth=%d%s -D colsPerWI=%d", + String opts = format("%s-D CN=%d -D SRC_T=%s -D SRC_T1=%s -D DST_T=%s -D KERCN=%d -D DEPTH=%d%s -D COLS_PER_WI=%d", haveScalar ? "-D HAVE_SCALAR " : "", cn, ocl::typeToStr(CV_MAKE_TYPE(sdepth, kercn)), ocl::typeToStr(sdepth), ocl::typeToStr(CV_8UC(colsPerWI)), kercn, sdepth, doubleSupport ? " -D DOUBLE_SUPPORT" : "", colsPerWI); diff --git a/modules/core/src/opencl/inrange.cl b/modules/core/src/opencl/inrange.cl index 538259539a..a5efd38bcf 100644 --- a/modules/core/src/opencl/inrange.cl +++ b/modules/core/src/opencl/inrange.cl @@ -52,7 +52,7 @@ __kernel void inrange(__global const uchar * src1ptr, int src1_step, int src1_offset, __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, #ifdef HAVE_SCALAR - __global const srcT1 * src2, __global const srcT1 * src3, + __global const SRC_T1 * src2, __global const SRC_T1 * src3, #else __global const uchar * src2ptr, int src2_step, int src2_offset, __global const uchar * src3ptr, int src3_step, int src3_offset, @@ -64,56 +64,56 @@ __kernel void inrange(__global const uchar * src1ptr, int src1_step, int src1_of if (x < dst_cols) { - int src1_index = mad24(y0, src1_step, mad24(x, (int)sizeof(srcT1) * kercn, src1_offset)); - int dst_index = mad24(y0, dst_step, mad24(x, colsPerWI, dst_offset)); + int src1_index = mad24(y0, src1_step, mad24(x, (int)sizeof(SRC_T1) * KERCN, src1_offset)); + int dst_index = mad24(y0, dst_step, mad24(x, COLS_PER_WI, dst_offset)); #ifndef HAVE_SCALAR - int src2_index = mad24(y0, src2_step, mad24(x, (int)sizeof(srcT1) * kercn, src2_offset)); - int src3_index = mad24(y0, src3_step, mad24(x, (int)sizeof(srcT1) * kercn, src3_offset)); + int src2_index = mad24(y0, src2_step, mad24(x, (int)sizeof(SRC_T1) * KERCN, src2_offset)); + int src3_index = mad24(y0, src3_step, mad24(x, (int)sizeof(SRC_T1) * KERCN, src3_offset)); #endif for (int y = y0, y1 = min(dst_rows, y0 + rowsPerWI); y < y1; ++y, src1_index += src1_step, dst_index += dst_step) { -#if kercn >= cn && kercn == 4 && depth <= 4 && !defined HAVE_SCALAR - srcT src1 = *(__global const srcT *)(src1ptr + src1_index); - srcT src2 = *(__global const srcT *)(src2ptr + src2_index); - srcT src3 = *(__global const srcT *)(src3ptr + src3_index); - __global dstT * dst = (__global dstT *)(dstptr + dst_index); -#if cn == 1 - dst[0] = src2 > src1 || src3 < src1 ? (dstT)(0) : (dstT)(255); -#elif cn == 2 - dst[0] = (dstT)(src2.xy > src1.xy || src3.xy < src1.xy || - src2.zw > src1.zw || src3.zw < src1.zw ? (dstT)(0) : (dstT)(255); -#elif cn == 4 - dst[0] = (dstT)(src2.x > src1.x || src3.x < src1.x || +#if KERCN >= CN && KERCN == 4 && DEPTH <= 4 && !defined HAVE_SCALAR + SRC_T src1 = *(__global const SRC_T *)(src1ptr + src1_index); + SRC_T src2 = *(__global const SRC_T *)(src2ptr + src2_index); + SRC_T src3 = *(__global const SRC_T *)(src3ptr + src3_index); + __global DST_T * dst = (__global DST_T *)(dstptr + dst_index); +#if CN == 1 + dst[0] = src2 > src1 || src3 < src1 ? (DST_T)(0) : (DST_T)(255); +#elif CN == 2 + dst[0] = (DST_T)(src2.xy > src1.xy || src3.xy < src1.xy || + src2.zw > src1.zw || src3.zw < src1.zw ? (DST_T)(0) : (DST_T)(255); +#elif CN == 4 + dst[0] = (DST_T)(src2.x > src1.x || src3.x < src1.x || src2.y > src1.y || src3.y < src1.y || src2.z > src1.z || src3.z < src1.z || src2.w > src1.w || src3.w < src1.w ? 0 : 255); #endif #else - __global const srcT1 * src1 = (__global const srcT1 *)(src1ptr + src1_index); + __global const SRC_T1 * src1 = (__global const SRC_T1 *)(src1ptr + src1_index); __global uchar * dst = dstptr + dst_index; #ifndef HAVE_SCALAR - __global const srcT1 * src2 = (__global const srcT1 *)(src2ptr + src2_index); - __global const srcT1 * src3 = (__global const srcT1 *)(src3ptr + src3_index); + __global const SRC_T1 * src2 = (__global const SRC_T1 *)(src2ptr + src2_index); + __global const SRC_T1 * src3 = (__global const SRC_T1 *)(src3ptr + src3_index); #endif #pragma unroll - for (int px = 0; px < colsPerWI; ++px, src1 += cn + for (int px = 0; px < COLS_PER_WI; ++px, src1 += CN #ifndef HAVE_SCALAR - , src2 += cn, src3 += cn + , src2 += CN, src3 += CN #endif ) { dst[px] = 255; - for (int c = 0; c < cn; ++c) + for (int c = 0; c < CN; ++c) if (src2[c] > src1[c] || src3[c] < src1[c]) { dst[px] = 0; break; } } -#endif // kercn >= cn +#endif // KERCN >= CN #ifndef HAVE_SCALAR src2_index += src2_step; src3_index += src3_step; diff --git a/modules/imgproc/src/color.hpp b/modules/imgproc/src/color.hpp index 7751d823b1..6ebca26a2c 100644 --- a/modules/imgproc/src/color.hpp +++ b/modules/imgproc/src/color.hpp @@ -277,7 +277,7 @@ struct OclHelper int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; int pxPerWIx = 1; - cv::String baseOptions = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ", + cv::String baseOptions = format("-D SRC_DEPTH=%d -D SCN=%d -D PIX_PER_WI_Y=%d ", src.depth(), src.channels(), pxPerWIy); switch (sizePolicy) diff --git a/modules/imgproc/src/color_hsv.dispatch.cpp b/modules/imgproc/src/color_hsv.dispatch.cpp index f1678f5deb..8639784927 100644 --- a/modules/imgproc/src/color_hsv.dispatch.cpp +++ b/modules/imgproc/src/color_hsv.dispatch.cpp @@ -219,7 +219,7 @@ bool oclCvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, b int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 255); if(!h.createKernel("HSV2RGB", ocl::imgproc::color_hsv_oclsrc, - format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", dcn, bidx, hrange, 6.f/hrange))) + format("-D DCN=%d -D BIDX=%d -D HRANGE=%d -D HSCALE=%ff", dcn, bidx, hrange, 6.f/hrange))) { return false; } @@ -234,7 +234,7 @@ bool oclCvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, b int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 255); if(!h.createKernel("HLS2RGB", ocl::imgproc::color_hsv_oclsrc, - format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", dcn, bidx, hrange, 6.f/hrange))) + format("-D DCN=%d -D BIDX=%d -D HRANGE=%d -D HSCALE=%ff", dcn, bidx, hrange, 6.f/hrange))) { return false; } @@ -249,7 +249,7 @@ bool oclCvtColorBGR2HLS( InputArray _src, OutputArray _dst, int bidx, bool full float hscale = (_src.depth() == CV_32F ? 360.f : (!full ? 180.f : 256.f))/360.f; if(!h.createKernel("RGB2HLS", ocl::imgproc::color_hsv_oclsrc, - format("-D hscale=%ff -D bidx=%d -D dcn=3", hscale, bidx))) + format("-D HSCALE=%ff -D BIDX=%d -D DCN=3", hscale, bidx))) { return false; } @@ -264,8 +264,8 @@ bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full int hrange = _src.depth() == CV_32F ? 360 : (!full ? 180 : 256); cv::String options = (_src.depth() == CV_8U ? - format("-D hrange=%d -D bidx=%d -D dcn=3", hrange, bidx) : - format("-D hscale=%ff -D bidx=%d -D dcn=3", hrange*(1.f/360.f), bidx)); + format("-D HRANGE=%d -D BIDX=%d -D DCN=3", hrange, bidx) : + format("-D HSCALE=%ff -D BIDX=%d -D DCN=3", hrange*(1.f/360.f), bidx)); if(!h.createKernel("RGB2HSV", ocl::imgproc::color_hsv_oclsrc, options)) { diff --git a/modules/imgproc/src/color_lab.cpp b/modules/imgproc/src/color_lab.cpp index 49a69fc336..fdf797808a 100644 --- a/modules/imgproc/src/color_lab.cpp +++ b/modules/imgproc/src/color_lab.cpp @@ -4424,7 +4424,7 @@ bool oclCvtColorBGR2Luv( InputArray _src, OutputArray _dst, int bidx, bool srgb) OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); if(!h.createKernel("BGR2Luv", ocl::imgproc::color_lab_oclsrc, - format("-D dcn=3 -D bidx=%d%s", bidx, srgb ? " -D SRGB" : ""))) + format("-D DCN=3 -D BIDX=%d%s", bidx, srgb ? " -D SRGB" : ""))) { return false; } @@ -4492,7 +4492,7 @@ bool oclCvtColorBGR2Lab( InputArray _src, OutputArray _dst, int bidx, bool srgb OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); if(!h.createKernel("BGR2Lab", ocl::imgproc::color_lab_oclsrc, - format("-D dcn=3 -D bidx=%d%s", bidx, srgb ? " -D SRGB" : ""))) + format("-D DCN=3 -D BIDX=%d%s", bidx, srgb ? " -D SRGB" : ""))) { return false; } @@ -4587,7 +4587,7 @@ bool oclCvtColorLab2BGR(InputArray _src, OutputArray _dst, int dcn, int bidx, bo OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("Lab2BGR", ocl::imgproc::color_lab_oclsrc, - format("-D dcn=%d -D bidx=%d%s", dcn, bidx, srgb ? " -D SRGB" : ""))) + format("-D DCN=%d -D BIDX=%d%s", dcn, bidx, srgb ? " -D SRGB" : ""))) { return false; } @@ -4638,7 +4638,7 @@ bool oclCvtColorLuv2BGR(InputArray _src, OutputArray _dst, int dcn, int bidx, bo OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("Luv2BGR", ocl::imgproc::color_lab_oclsrc, - format("-D dcn=%d -D bidx=%d%s", dcn, bidx, srgb ? " -D SRGB" : ""))) + format("-D DCN=%d -D BIDX=%d%s", dcn, bidx, srgb ? " -D SRGB" : ""))) { return false; } @@ -4692,7 +4692,7 @@ bool oclCvtColorBGR2XYZ( InputArray _src, OutputArray _dst, int bidx ) OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); if(!h.createKernel("RGB2XYZ", ocl::imgproc::color_lab_oclsrc, - format("-D dcn=3 -D bidx=%d", bidx))) + format("-D DCN=3 -D BIDX=%d", bidx))) { return false; } @@ -4740,7 +4740,7 @@ bool oclCvtColorXYZ2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx ) OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("XYZ2RGB", ocl::imgproc::color_lab_oclsrc, - format("-D dcn=%d -D bidx=%d", dcn, bidx))) + format("-D DCN=%d -D BIDX=%d", dcn, bidx))) { return false; } diff --git a/modules/imgproc/src/color_rgb.dispatch.cpp b/modules/imgproc/src/color_rgb.dispatch.cpp index ed2961f0fb..efe6c9d6cb 100644 --- a/modules/imgproc/src/color_rgb.dispatch.cpp +++ b/modules/imgproc/src/color_rgb.dispatch.cpp @@ -428,7 +428,7 @@ bool oclCvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool revers OclHelper< Set<3, 4>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("RGB", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=%d -D bidx=0 -D %s", dcn, reverse ? "REVERSE" : "ORDER"))) + format("-D DCN=%d -D BIDX=0 -D %s", dcn, reverse ? "REVERSE" : "ORDER"))) { return false; } @@ -441,7 +441,7 @@ bool oclCvtColorBGR25x5( InputArray _src, OutputArray _dst, int bidx, int gbits OclHelper< Set<3, 4>, Set<2>, Set > h(_src, _dst, 2); if(!h.createKernel("RGB2RGB5x5", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, gbits))) + format("-D DCN=2 -D BIDX=%d -D GREENBITS=%d", bidx, gbits))) { return false; } @@ -454,7 +454,7 @@ bool oclCvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, i OclHelper< Set<2>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("RGB5x52RGB", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, gbits))) + format("-D DCN=%d -D BIDX=%d -D GREENBITS=%d", dcn, bidx, gbits))) { return false; } @@ -467,7 +467,7 @@ bool oclCvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits) OclHelper< Set<2>, Set<1>, Set > h(_src, _dst, 1); if(!h.createKernel("BGR5x52Gray", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=1 -D bidx=0 -D greenbits=%d", gbits))) + format("-D DCN=1 -D BIDX=0 -D GREENBITS=%d", gbits))) { return false; } @@ -480,7 +480,7 @@ bool oclCvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits) OclHelper< Set<1>, Set<2>, Set > h(_src, _dst, 2); if(!h.createKernel("Gray2BGR5x5", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=2 -D bidx=0 -D greenbits=%d", gbits))) + format("-D DCN=2 -D BIDX=0 -D GREENBITS=%d", gbits))) { return false; } @@ -494,7 +494,7 @@ bool oclCvtColorBGR2Gray( InputArray _src, OutputArray _dst, int bidx) int stripeSize = 1; if(!h.createKernel("RGB2Gray", ocl::imgproc::color_rgb_oclsrc, - format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d", bidx, stripeSize))) + format("-D DCN=1 -D BIDX=%d -D STRIPE_SIZE=%d", bidx, stripeSize))) { return false; } @@ -507,7 +507,7 @@ bool oclCvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn) { OclHelper< Set<1>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("Gray2RGB", ocl::imgproc::color_rgb_oclsrc, - format("-D bidx=0 -D dcn=%d", dcn))) + format("-D BIDX=0 -D DCN=%d", dcn))) { return false; } @@ -520,7 +520,7 @@ bool oclCvtColorRGBA2mRGBA( InputArray _src, OutputArray _dst) OclHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); if(!h.createKernel("RGBA2mRGBA", ocl::imgproc::color_rgb_oclsrc, - "-D dcn=4 -D bidx=3")) + "-D DCN=4 -D BIDX=3")) { return false; } @@ -533,7 +533,7 @@ bool oclCvtColormRGBA2RGBA( InputArray _src, OutputArray _dst) OclHelper< Set<4>, Set<4>, Set > h(_src, _dst, 4); if(!h.createKernel("mRGBA2RGBA", ocl::imgproc::color_rgb_oclsrc, - "-D dcn=4 -D bidx=3")) + "-D DCN=4 -D BIDX=3")) { return false; } diff --git a/modules/imgproc/src/color_yuv.dispatch.cpp b/modules/imgproc/src/color_yuv.dispatch.cpp index 8720908100..73e1aea32d 100644 --- a/modules/imgproc/src/color_yuv.dispatch.cpp +++ b/modules/imgproc/src/color_yuv.dispatch.cpp @@ -232,7 +232,7 @@ bool oclCvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx ) OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("YUV2RGB", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d", dcn, bidx))) + format("-D DCN=%d -D BIDX=%d", dcn, bidx))) { return false; } @@ -245,7 +245,7 @@ bool oclCvtColorBGR2YUV( InputArray _src, OutputArray _dst, int bidx ) OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); if(!h.createKernel("RGB2YUV", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=3 -D bidx=%d", bidx))) + format("-D DCN=3 -D BIDX=%d", bidx))) { return false; } @@ -258,7 +258,7 @@ bool oclCvtcolorYCrCb2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx) OclHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); if(!h.createKernel("YCrCb2RGB", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d", dcn, bidx))) + format("-D DCN=%d -D BIDX=%d", dcn, bidx))) { return false; } @@ -271,7 +271,7 @@ bool oclCvtColorBGR2YCrCb( InputArray _src, OutputArray _dst, int bidx) OclHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); if(!h.createKernel("RGB2YCrCb", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=3 -D bidx=%d", bidx))) + format("-D DCN=3 -D BIDX=%d", bidx))) { return false; } @@ -285,7 +285,7 @@ bool oclCvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bool optimized = _src.offset() % 4 == 0 && _src.step() % 4 == 0; if(!h.createKernel("YUV2RGB_422", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx, + format("-D DCN=%d -D BIDX=%d -D UIDX=%d -D YIDX=%d%s", dcn, bidx, uidx, yidx, optimized ? " -D USE_OPTIMIZED_LOAD" : ""))) { return false; @@ -299,7 +299,7 @@ bool oclCvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, int dcn, int OclHelper< Set<3, 4>, Set<2>, Set > h(_src, _dst, dcn); if(!h.createKernel("RGB2YUV_422", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx + format("-D DCN=%d -D BIDX=%d -D UIDX=%d -D YIDX=%d", dcn, bidx, uidx, yidx ))) { return false; @@ -321,7 +321,7 @@ bool oclCvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int OclHelper< Set<1>, Set<3, 4>, Set, FROM_YUV > h(_src, _dst, dcn); if(!h.createKernel("YUV2RGB_NVx", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx))) + format("-D DCN=%d -D BIDX=%d -D UIDX=%d", dcn, bidx, uidx))) { return false; } @@ -334,7 +334,7 @@ bool oclCvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, i OclHelper< Set<1>, Set<3, 4>, Set, FROM_YUV > h(_src, _dst, dcn); if(!h.createKernel("YUV2RGB_YV12_IYUV", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx, + format("-D DCN=%d -D BIDX=%d -D UIDX=%d%s", dcn, bidx, uidx, _src.isContinuous() ? " -D SRC_CONT" : ""))) { return false; @@ -348,7 +348,7 @@ bool oclCvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, int bidx, OclHelper< Set<3, 4>, Set<1>, Set, TO_YUV > h(_src, _dst, 1); if(!h.createKernel("RGB2YUV_YV12_IYUV", ocl::imgproc::color_yuv_oclsrc, - format("-D dcn=1 -D bidx=%d -D uidx=%d", bidx, uidx))) + format("-D DCN=1 -D BIDX=%d -D UIDX=%d", bidx, uidx))) { return false; } diff --git a/modules/imgproc/src/deriv.cpp b/modules/imgproc/src/deriv.cpp index 31803036ce..9cec64a9e9 100644 --- a/modules/imgproc/src/deriv.cpp +++ b/modules/imgproc/src/deriv.cpp @@ -583,9 +583,9 @@ static bool ocl_Laplacian5(InputArray _src, OutputArray _dst, "BORDER_REFLECT_101" }; String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s" - " -D convertToWT=%s -D convertToDT=%s" - " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s" - " -D srcT=%s -D dstT=%s -D WT=%s" + " -D CONVERT_TO_WT=%s -D CONVERT_TO_DT=%s" + " -D %s -D SRC_T1=%s -D DST_T1=%s -D WT1=%s" + " -D SRC_T=%s -D DST_T=%s -D WT=%s" " -D CN=%d ", (int)lt2[0], (int)lt2[1], kernelX.cols / 2, ocl::kernelToStr(kernelX, wdepth, "KERNEL_MATRIX_X").c_str(), @@ -627,8 +627,8 @@ static bool ocl_Laplacian5(InputArray _src, OutputArray _dst, char cvt[2][50]; ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc, format("-D ONLY_SUM_CONVERT " - "-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d " - "-D convertToWT=%s -D convertToDT=%s%s", + "-D SRC_T=%s -D WT=%s -D DST_T=%s -D COEFF_T=%s -D WDEPTH=%d " + "-D CONVERT_TO_WT=%s -D CONVERT_TO_DT=%s%s", ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index fc55b0f642..39f983ae7b 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -1354,7 +1354,7 @@ static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, Input static const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" }; static const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101", "BORDER_TRANSPARENT" }; - String buildOptions = format("-D %s -D %s -D T=%s -D rowsPerWI=%d", + String buildOptions = format("-D %s -D %s -D T=%s -D ROWS_PER_WI=%d", interMap[interpolation], borderMap[borderType], ocl::typeToStr(type), rowsPerWI); @@ -1363,8 +1363,8 @@ static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, Input char cvt[3][50]; int wdepth = std::max(CV_32F, depth); buildOptions = buildOptions - + format(" -D WT=%s -D convertToT=%s -D convertToWT=%s" - " -D convertToWT2=%s -D WT2=%s", + + format(" -D WT=%s -D CONVERT_TO_T=%s -D CONVERT_TO_WT=%s" + " -D CONVERT_TO_WT2=%s -D WT2=%s", ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::convertTypeStr(wdepth, depth, cn, cvt[0], sizeof(cvt[0])), ocl::convertTypeStr(depth, wdepth, cn, cvt[1], sizeof(cvt[1])), @@ -1373,7 +1373,7 @@ static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, Input } int scalarcn = cn == 3 ? 4 : cn; int sctype = CV_MAKETYPE(depth, scalarcn); - buildOptions += format(" -D T=%s -D T1=%s -D cn=%d -D ST=%s -D depth=%d", + buildOptions += format(" -D T=%s -D T1=%s -D CN=%d -D ST=%s -D SRC_DEPTH=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn, ocl::typeToStr(sctype), depth); @@ -2494,7 +2494,7 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0, String opts; if (interpolation == INTER_NEAREST) { - opts = format("-D INTER_NEAREST -D T=%s%s -D CT=%s -D T1=%s -D ST=%s -D cn=%d -D rowsPerWI=%d", + opts = format("-D INTER_NEAREST -D T=%s%s -D CT=%s -D T1=%s -D ST=%s -D CN=%d -D ROWS_PER_WI=%d", ocl::typeToStr(type), doubleSupport ? " -D DOUBLE_SUPPORT" : "", useDouble ? "double" : "float", @@ -2504,8 +2504,8 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0, else { char cvt[2][50]; - opts = format("-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D depth=%d" - " -D convertToWT=%s -D convertToT=%s%s -D CT=%s -D cn=%d -D rowsPerWI=%d", + opts = format("-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D SRC_DEPTH=%d" + " -D CONVERT_TO_WT=%s -D CONVERT_TO_T=%s%s -D CT=%s -D CN=%d -D ROWS_PER_WI=%d", interpolationMap[interpolation], ocl::typeToStr(type), ocl::typeToStr(CV_MAT_DEPTH(type)), ocl::typeToStr(sctype), diff --git a/modules/imgproc/src/opencl/color_hsv.cl b/modules/imgproc/src/opencl/color_hsv.cl index eb883bdc96..8eec8edd1c 100644 --- a/modules/imgproc/src/opencl/color_hsv.cl +++ b/modules/imgproc/src/opencl/color_hsv.cl @@ -46,21 +46,21 @@ /**************************************PUBLICFUNC*************************************/ -#if depth == 0 +#if SRC_DEPTH == 0 #define DATA_TYPE uchar #define MAX_NUM 255 #define HALF_MAX_NUM 128 #define COEFF_TYPE int #define SAT_CAST(num) convert_uchar_sat(num) #define DEPTH_0 -#elif depth == 2 +#elif SRC_DEPTH == 2 #define DATA_TYPE ushort #define MAX_NUM 65535 #define HALF_MAX_NUM 32768 #define COEFF_TYPE int #define SAT_CAST(num) convert_ushort_sat(num) #define DEPTH_2 -#elif depth == 5 +#elif SRC_DEPTH == 5 #define DATA_TYPE float #define MAX_NUM 1.0f #define HALF_MAX_NUM 0.5f @@ -78,18 +78,18 @@ enum hsv_shift = 12 }; -#define scnbytes ((int)sizeof(DATA_TYPE)*scn) -#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn) +#define scnbytes ((int)sizeof(DATA_TYPE)*SCN) +#define dcnbytes ((int)sizeof(DATA_TYPE)*DCN) -#ifndef hscale -#define hscale 0 +#ifndef HSCALE +#define HSCALE 0 #endif -#ifndef hrange -#define hrange 0 +#ifndef HRANGE +#define HRANGE 0 #endif -#if bidx == 0 +#if BIDX == 0 #define R_COMP z #define G_COMP y #define B_COMP x @@ -148,7 +148,7 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset, h = (vr & (g - b)) + (~vr & ((vg & mad24(diff, 2, b - r)) + ((~vg) & mad24(4, diff, r - g)))); h = mad24(h, hdiv_table[diff], (1 << (hsv_shift-1))) >> hsv_shift; - h += h < 0 ? hrange : 0; + h += h < 0 ? HRANGE : 0; dst[dst_index] = convert_uchar_sat_rte(h); dst[dst_index + 1] = (uchar)s; @@ -188,7 +188,7 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset, { float tab[4]; int sector; - h *= hscale; + h *= HSCALE; if( h < 0 ) do h += 6; while( h < 0 ); else if( h >= 6 ) @@ -213,10 +213,10 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset, else b = g = r = v; - dst[dst_index + bidx] = convert_uchar_sat_rte(b*255.f); + dst[dst_index + BIDX] = convert_uchar_sat_rte(b*255.f); dst[dst_index + 1] = convert_uchar_sat_rte(g*255.f); - dst[dst_index + (bidx^2)] = convert_uchar_sat_rte(r*255.f); -#if dcn == 4 + dst[dst_index + (BIDX^2)] = convert_uchar_sat_rte(r*255.f); +#if DCN == 4 dst[dst_index + 3] = MAX_NUM; #endif @@ -275,7 +275,7 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset if( h < 0 ) h += 360.f; - dst[0] = h*hscale; + dst[0] = h*HSCALE; dst[1] = s; dst[2] = v; @@ -316,7 +316,7 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset { float tab[4]; int sector; - h *= hscale; + h *= HSCALE; if(h < 0) do h += 6; while (h < 0); else if (h >= 6) @@ -341,10 +341,10 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset else b = g = r = v; - dst[bidx] = b; + dst[BIDX] = b; dst[1] = g; - dst[bidx^2] = r; -#if dcn == 4 + dst[BIDX^2] = r; +#if DCN == 4 dst[3] = MAX_NUM; #endif @@ -410,7 +410,7 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset, h += 360.f; } - dst[dst_index] = convert_uchar_sat_rte(h*hscale); + dst[dst_index] = convert_uchar_sat_rte(h*HSCALE); dst[dst_index + 1] = convert_uchar_sat_rte(l*255.f); dst[dst_index + 2] = convert_uchar_sat_rte(s*255.f); @@ -451,7 +451,7 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset, float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s; float p1 = 2*l - p2; - h *= hscale; + h *= HSCALE; if( h < 0 ) do h += 6; while( h < 0 ); else if( h >= 6 ) @@ -472,10 +472,10 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset, else b = g = r = l; - dst[dst_index + bidx] = convert_uchar_sat_rte(b*255.f); + dst[dst_index + BIDX] = convert_uchar_sat_rte(b*255.f); dst[dst_index + 1] = convert_uchar_sat_rte(g*255.f); - dst[dst_index + (bidx^2)] = convert_uchar_sat_rte(r*255.f); -#if dcn == 4 + dst[dst_index + (BIDX^2)] = convert_uchar_sat_rte(r*255.f); +#if DCN == 4 dst[dst_index + 3] = MAX_NUM; #endif @@ -538,7 +538,7 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset if( h < 0.f ) h += 360.f; } - dst[0] = h*hscale; + dst[0] = h*HSCALE; dst[1] = l; dst[2] = s; @@ -582,7 +582,7 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s; float p1 = 2*l - p2; - h *= hscale; + h *= HSCALE; if( h < 0 ) do h += 6; while( h < 0 ); else if( h >= 6 ) @@ -603,10 +603,10 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset else b = g = r = l; - dst[bidx] = b; + dst[BIDX] = b; dst[1] = g; - dst[bidx^2] = r; -#if dcn == 4 + dst[BIDX^2] = r; +#if DCN == 4 dst[3] = MAX_NUM; #endif diff --git a/modules/imgproc/src/opencl/color_lab.cl b/modules/imgproc/src/opencl/color_lab.cl index 16a96d25e7..1be74a2466 100644 --- a/modules/imgproc/src/opencl/color_lab.cl +++ b/modules/imgproc/src/opencl/color_lab.cl @@ -44,21 +44,21 @@ // //M*/ -#if depth == 0 +#if SRC_DEPTH == 0 #define DATA_TYPE uchar #define MAX_NUM 255 #define HALF_MAX_NUM 128 #define COEFF_TYPE int #define SAT_CAST(num) convert_uchar_sat(num) #define DEPTH_0 -#elif depth == 2 +#elif SRC_DEPTH == 2 #define DATA_TYPE ushort #define MAX_NUM 65535 #define HALF_MAX_NUM 32768 #define COEFF_TYPE int #define SAT_CAST(num) convert_ushort_sat(num) #define DEPTH_2 -#elif depth == 5 +#elif SRC_DEPTH == 5 #define DATA_TYPE float #define MAX_NUM 1.0f #define HALF_MAX_NUM 0.5f @@ -76,8 +76,8 @@ enum xyz_shift = 12, }; -#define scnbytes ((int)sizeof(DATA_TYPE)*scn) -#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn) +#define scnbytes ((int)sizeof(DATA_TYPE)*SCN) +#define dcnbytes ((int)sizeof(DATA_TYPE)*DCN) #define __CAT(x, y) x##y #define CAT(x, y) __CAT(x, y) @@ -167,11 +167,11 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse DATA_TYPE dst0 = SAT_CAST(b); DATA_TYPE dst1 = SAT_CAST(g); DATA_TYPE dst2 = SAT_CAST(r); -#if dcn == 3 || defined DEPTH_5 +#if DCN == 3 || defined DEPTH_5 dst[0] = dst0; dst[1] = dst1; dst[2] = dst2; -#if dcn == 4 +#if DCN == 4 dst[3] = MAX_NUM; #endif #else @@ -403,7 +403,7 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset, #endif coeffs, lThresh, fThresh); -#if dcn == 3 +#if DCN == 3 dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f); dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f); dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f); @@ -455,7 +455,7 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse coeffs, lThresh, fThresh); dst[0] = dstbuf[0], dst[1] = dstbuf[1], dst[2] = dstbuf[2]; -#if dcn == 4 +#if DCN == 4 dst[3] = MAX_NUM; #endif ++y; @@ -644,7 +644,7 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse dst[0] = R; dst[1] = G; dst[2] = B; -#if dcn == 4 +#if DCN == 4 dst[3] = MAX_NUM; #endif ++y; @@ -717,7 +717,7 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset, uchar dst1 = SAT_CAST(G * 255.0f); uchar dst2 = SAT_CAST(B * 255.0f); -#if dcn == 4 +#if DCN == 4 *(__global uchar4 *)dst = (uchar4)(dst0, dst1, dst2, MAX_NUM); #else dst[0] = dst0; diff --git a/modules/imgproc/src/opencl/color_rgb.cl b/modules/imgproc/src/opencl/color_rgb.cl index dd4563e111..d5cffe6dcd 100644 --- a/modules/imgproc/src/opencl/color_rgb.cl +++ b/modules/imgproc/src/opencl/color_rgb.cl @@ -46,21 +46,21 @@ /**************************************PUBLICFUNC*************************************/ -#if depth == 0 +#if SRC_DEPTH == 0 #define DATA_TYPE uchar #define MAX_NUM 255 #define HALF_MAX_NUM 128 #define COEFF_TYPE int #define SAT_CAST(num) convert_uchar_sat(num) #define DEPTH_0 -#elif depth == 2 +#elif SRC_DEPTH == 2 #define DATA_TYPE ushort #define MAX_NUM 65535 #define HALF_MAX_NUM 32768 #define COEFF_TYPE int #define SAT_CAST(num) convert_ushort_sat(num) #define DEPTH_2 -#elif depth == 5 +#elif SRC_DEPTH == 5 #define DATA_TYPE float #define MAX_NUM 1.0f #define HALF_MAX_NUM 0.5f @@ -86,10 +86,10 @@ enum #define G2YF 0.587f #define R2YF 0.299f -#define scnbytes ((int)sizeof(DATA_TYPE)*scn) -#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn) +#define scnbytes ((int)sizeof(DATA_TYPE)*SCN) +#define dcnbytes ((int)sizeof(DATA_TYPE)*DCN) -#if bidx == 0 +#if BIDX == 0 #define R_COMP z #define G_COMP y #define B_COMP x @@ -160,9 +160,9 @@ __kernel void Gray2RGB(__global const uchar * srcptr, int src_step, int src_offs __global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index); __global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index); DATA_TYPE val = src[0]; -#if dcn == 3 || defined DEPTH_5 +#if DCN == 3 || defined DEPTH_5 dst[0] = dst[1] = dst[2] = val; -#if dcn == 4 +#if DCN == 4 dst[3] = MAX_NUM; #endif #else @@ -197,7 +197,7 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset, { __global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index); __global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index); -#if scn == 3 +#if SCN == 3 DATA_TYPE_3 src_pix = vload3(0, src); #else DATA_TYPE_4 src_pix = vload4(0, src); @@ -213,8 +213,8 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset, dst[2] = src_pix.z; #endif -#if dcn == 4 -#if scn == 3 +#if DCN == 4 +#if SCN == 3 dst[3] = MAX_NUM; #else dst[3] = src[3]; @@ -250,18 +250,18 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset { ushort t = *((__global const ushort*)(src + src_index)); -#if greenbits == 6 - dst[dst_index + bidx] = (uchar)(t << 3); +#if GREENBITS == 6 + dst[dst_index + BIDX] = (uchar)(t << 3); dst[dst_index + 1] = (uchar)((t >> 3) & ~3); - dst[dst_index + (bidx^2)] = (uchar)((t >> 8) & ~7); + dst[dst_index + (BIDX^2)] = (uchar)((t >> 8) & ~7); #else - dst[dst_index + bidx] = (uchar)(t << 3); + dst[dst_index + BIDX] = (uchar)(t << 3); dst[dst_index + 1] = (uchar)((t >> 2) & ~7); - dst[dst_index + (bidx^2)] = (uchar)((t >> 7) & ~7); + dst[dst_index + (BIDX^2)] = (uchar)((t >> 7) & ~7); #endif -#if dcn == 4 -#if greenbits == 6 +#if DCN == 4 +#if GREENBITS == 6 dst[dst_index + 3] = 255; #else dst[dst_index + 3] = t & 0x8000 ? 255 : 0; @@ -295,9 +295,9 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset { uchar4 src_pix = vload4(0, src + src_index); -#if greenbits == 6 +#if GREENBITS == 6 *((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~3) << 3)|((src_pix.R_COMP&~7) << 8)); -#elif scn == 3 +#elif SCN == 3 *((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)|((src_pix.R_COMP&~7) << 7)); #else *((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)| @@ -333,7 +333,7 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse { int t = *((__global const ushort*)(src + src_index)); -#if greenbits == 6 +#if GREENBITS == 6 dst[dst_index] = (uchar)CV_DESCALE(mad24((t << 3) & 0xf8, BY15, mad24((t >> 3) & 0xfc, GY15, ((t >> 8) & 0xf8) * RY15)), gray_shift); #else dst[dst_index] = (uchar)CV_DESCALE(mad24((t << 3) & 0xf8, BY15, mad24((t >> 2) & 0xf8, GY15, ((t >> 7) & 0xf8) * RY15)), gray_shift); @@ -365,7 +365,7 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse { int t = src[src_index]; -#if greenbits == 6 +#if GREENBITS == 6 *((__global ushort*)(dst + dst_index)) = (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8)); #else t >>= 3; diff --git a/modules/imgproc/src/opencl/color_yuv.cl b/modules/imgproc/src/opencl/color_yuv.cl index c536f87a0b..ebef91743f 100644 --- a/modules/imgproc/src/opencl/color_yuv.cl +++ b/modules/imgproc/src/opencl/color_yuv.cl @@ -46,21 +46,21 @@ /**************************************PUBLICFUNC*************************************/ -#if depth == 0 +#if SRC_DEPTH == 0 #define DATA_TYPE uchar #define MAX_NUM 255 #define HALF_MAX_NUM 128 #define COEFF_TYPE int #define SAT_CAST(num) convert_uchar_sat(num) #define DEPTH_0 -#elif depth == 2 +#elif SRC_DEPTH == 2 #define DATA_TYPE ushort #define MAX_NUM 65535 #define HALF_MAX_NUM 32768 #define COEFF_TYPE int #define SAT_CAST(num) convert_ushort_sat(num) #define DEPTH_2 -#elif depth == 5 +#elif SRC_DEPTH == 5 #define DATA_TYPE float #define MAX_NUM 1.0f #define HALF_MAX_NUM 0.5f @@ -114,10 +114,10 @@ enum #define CR2GI -11698 #define CB2BI 29049 -#define scnbytes ((int)sizeof(DATA_TYPE)*scn) -#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn) +#define scnbytes ((int)sizeof(DATA_TYPE)*SCN) +#define dcnbytes ((int)sizeof(DATA_TYPE)*DCN) -#if bidx == 0 +#if BIDX == 0 #define R_COMP z #define G_COMP y #define B_COMP x @@ -127,12 +127,12 @@ enum #define B_COMP z #endif -#ifndef uidx -#define uidx 0 +#ifndef UIDX +#define UIDX 0 #endif -#ifndef yidx -#define yidx 0 +#ifndef YIDX +#define YIDX 0 #endif #ifndef PIX_PER_WI_X @@ -234,10 +234,10 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset const int b = Y + CV_DESCALE(mul24(U - HALF_MAX_NUM, coeffs[0]), yuv_shift); #endif - dst[bidx] = SAT_CAST( b ); + dst[BIDX] = SAT_CAST( b ); dst[1] = SAT_CAST( g ); - dst[bidx^2] = SAT_CAST( r ); -#if dcn == 4 + dst[BIDX^2] = SAT_CAST( r ); +#if DCN == 4 dst[3] = MAX_NUM; #endif ++y; @@ -266,7 +266,7 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of { __global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset); __global const uchar* usrc = srcptr + mad24(rows + y, src_step, (x << 1) + src_offset); - __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, mad24(x, dcn<<1, dt_offset)); + __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, mad24(x, DCN<<1, dt_offset)); __global uchar* dst2 = dst1 + dst_step; float Y1 = ysrc[0]; @@ -274,8 +274,8 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of float Y3 = ysrc[src_step]; float Y4 = ysrc[src_step + 1]; - float U = ((float)usrc[uidx]) - HALF_MAX_NUM; - float V = ((float)usrc[1-uidx]) - HALF_MAX_NUM; + float U = ((float)usrc[UIDX]) - HALF_MAX_NUM; + float V = ((float)usrc[1-UIDX]) - HALF_MAX_NUM; __constant float* coeffs = c_YUV2RGBCoeffs_420; float ruv = fma(coeffs[4], V, 0.5f); @@ -283,34 +283,34 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of float buv = fma(coeffs[1], U, 0.5f); Y1 = max(0.f, Y1 - 16.f) * coeffs[0]; - dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv); + dst1[2 - BIDX] = convert_uchar_sat(Y1 + ruv); dst1[1] = convert_uchar_sat(Y1 + guv); - dst1[bidx] = convert_uchar_sat(Y1 + buv); -#if dcn == 4 + dst1[BIDX] = convert_uchar_sat(Y1 + buv); +#if DCN == 4 dst1[3] = 255; #endif Y2 = max(0.f, Y2 - 16.f) * coeffs[0]; - dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv); - dst1[dcn + 1] = convert_uchar_sat(Y2 + guv); - dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv); -#if dcn == 4 + dst1[DCN + 2 - BIDX] = convert_uchar_sat(Y2 + ruv); + dst1[DCN + 1] = convert_uchar_sat(Y2 + guv); + dst1[DCN + BIDX] = convert_uchar_sat(Y2 + buv); +#if DCN == 4 dst1[7] = 255; #endif Y3 = max(0.f, Y3 - 16.f) * coeffs[0]; - dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv); + dst2[2 - BIDX] = convert_uchar_sat(Y3 + ruv); dst2[1] = convert_uchar_sat(Y3 + guv); - dst2[bidx] = convert_uchar_sat(Y3 + buv); -#if dcn == 4 + dst2[BIDX] = convert_uchar_sat(Y3 + buv); +#if DCN == 4 dst2[3] = 255; #endif Y4 = max(0.f, Y4 - 16.f) * coeffs[0]; - dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv); - dst2[dcn + 1] = convert_uchar_sat(Y4 + guv); - dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv); -#if dcn == 4 + dst2[DCN + 2 - BIDX] = convert_uchar_sat(Y4 + ruv); + dst2[DCN + 1] = convert_uchar_sat(Y4 + guv); + dst2[DCN + BIDX] = convert_uchar_sat(Y4 + buv); +#if DCN == 4 dst2[7] = 255; #endif } @@ -319,7 +319,7 @@ __kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_of } } -#if uidx < 2 +#if UIDX < 2 __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset, __global uchar* dstptr, int dst_step, int dt_offset, @@ -336,7 +336,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int if (y < rows / 2 ) { __global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset); - __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset); + __global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (DCN<<1) + dt_offset); __global uchar* dst2 = dst1 + dst_step; float Y1 = ysrc[0]; @@ -354,8 +354,8 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int __global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0); float uv[2] = { ((float)usrc[0]) - HALF_MAX_NUM, ((float)vsrc[0]) - HALF_MAX_NUM }; #endif - float U = uv[uidx]; - float V = uv[1-uidx]; + float U = uv[UIDX]; + float V = uv[1-UIDX]; __constant float* coeffs = c_YUV2RGBCoeffs_420; float ruv = fma(coeffs[4], V, 0.5f); @@ -363,34 +363,34 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int float buv = fma(coeffs[1], U, 0.5f); Y1 = max(0.f, Y1 - 16.f) * coeffs[0]; - dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv); + dst1[2 - BIDX] = convert_uchar_sat(Y1 + ruv); dst1[1] = convert_uchar_sat(Y1 + guv); - dst1[bidx] = convert_uchar_sat(Y1 + buv); -#if dcn == 4 + dst1[BIDX] = convert_uchar_sat(Y1 + buv); +#if DCN == 4 dst1[3] = 255; #endif Y2 = max(0.f, Y2 - 16.f) * coeffs[0]; - dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv); - dst1[dcn + 1] = convert_uchar_sat(Y2 + guv); - dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv); -#if dcn == 4 + dst1[DCN + 2 - BIDX] = convert_uchar_sat(Y2 + ruv); + dst1[DCN + 1] = convert_uchar_sat(Y2 + guv); + dst1[DCN + BIDX] = convert_uchar_sat(Y2 + buv); +#if DCN == 4 dst1[7] = 255; #endif Y3 = max(0.f, Y3 - 16.f) * coeffs[0]; - dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv); + dst2[2 - BIDX] = convert_uchar_sat(Y3 + ruv); dst2[1] = convert_uchar_sat(Y3 + guv); - dst2[bidx] = convert_uchar_sat(Y3 + buv); -#if dcn == 4 + dst2[BIDX] = convert_uchar_sat(Y3 + buv); +#if DCN == 4 dst2[3] = 255; #endif Y4 = max(0.f, Y4 - 16.f) * coeffs[0]; - dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv); - dst2[dcn + 1] = convert_uchar_sat(Y4 + guv); - dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv); -#if dcn == 4 + dst2[DCN + 2 - BIDX] = convert_uchar_sat(Y4 + ruv); + dst2[DCN + 1] = convert_uchar_sat(Y4 + guv); + dst2[DCN + BIDX] = convert_uchar_sat(Y4 + buv); +#if DCN == 4 dst2[7] = 255; #endif } @@ -401,7 +401,7 @@ __kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int #endif -#if uidx < 2 +#if UIDX < 2 __constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f, 0.438999176f, -0.3679990768f, -0.0709991455f }; @@ -415,7 +415,7 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int if (x < cols/2) { - int src_index = mad24(y << 1, src_step, mad24(x << 1, scn, src_offset)); + int src_index = mad24(y << 1, src_step, mad24(x << 1, SCN, src_offset)); int ydst_index = mad24(y << 1, dst_step, (x << 1) + dst_offset); int y_rows = rows / 3 * 2; int vsteps[2] = { cols >> 1, dst_step - (cols >> 1)}; @@ -438,56 +438,56 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int int s11 = *((__global const int*) src1); int s12 = *((__global const int*) src1 + 1); int s13 = *((__global const int*) src1 + 2); -#if scn == 4 +#if SCN == 4 int s14 = *((__global const int*) src1 + 3); #endif int s21 = *((__global const int*) src2); int s22 = *((__global const int*) src2 + 1); int s23 = *((__global const int*) src2 + 2); -#if scn == 4 +#if SCN == 4 int s24 = *((__global const int*) src2 + 3); #endif - float src_pix1[scn * 4], src_pix2[scn * 4]; + float src_pix1[SCN * 4], src_pix2[SCN * 4]; *((float4*) src_pix1) = convert_float4(as_uchar4(s11)); *((float4*) src_pix1 + 1) = convert_float4(as_uchar4(s12)); *((float4*) src_pix1 + 2) = convert_float4(as_uchar4(s13)); -#if scn == 4 +#if SCN == 4 *((float4*) src_pix1 + 3) = convert_float4(as_uchar4(s14)); #endif *((float4*) src_pix2) = convert_float4(as_uchar4(s21)); *((float4*) src_pix2 + 1) = convert_float4(as_uchar4(s22)); *((float4*) src_pix2 + 2) = convert_float4(as_uchar4(s23)); -#if scn == 4 +#if SCN == 4 *((float4*) src_pix2 + 3) = convert_float4(as_uchar4(s24)); #endif uchar4 y1, y2; - y1.x = convert_uchar_sat(fma(coeffs[0], src_pix1[ 2-bidx], fma(coeffs[1], src_pix1[ 1], fma(coeffs[2], src_pix1[ bidx], 16.5f)))); - y1.y = convert_uchar_sat(fma(coeffs[0], src_pix1[ scn+2-bidx], fma(coeffs[1], src_pix1[ scn+1], fma(coeffs[2], src_pix1[ scn+bidx], 16.5f)))); - y1.z = convert_uchar_sat(fma(coeffs[0], src_pix1[2*scn+2-bidx], fma(coeffs[1], src_pix1[2*scn+1], fma(coeffs[2], src_pix1[2*scn+bidx], 16.5f)))); - y1.w = convert_uchar_sat(fma(coeffs[0], src_pix1[3*scn+2-bidx], fma(coeffs[1], src_pix1[3*scn+1], fma(coeffs[2], src_pix1[3*scn+bidx], 16.5f)))); - y2.x = convert_uchar_sat(fma(coeffs[0], src_pix2[ 2-bidx], fma(coeffs[1], src_pix2[ 1], fma(coeffs[2], src_pix2[ bidx], 16.5f)))); - y2.y = convert_uchar_sat(fma(coeffs[0], src_pix2[ scn+2-bidx], fma(coeffs[1], src_pix2[ scn+1], fma(coeffs[2], src_pix2[ scn+bidx], 16.5f)))); - y2.z = convert_uchar_sat(fma(coeffs[0], src_pix2[2*scn+2-bidx], fma(coeffs[1], src_pix2[2*scn+1], fma(coeffs[2], src_pix2[2*scn+bidx], 16.5f)))); - y2.w = convert_uchar_sat(fma(coeffs[0], src_pix2[3*scn+2-bidx], fma(coeffs[1], src_pix2[3*scn+1], fma(coeffs[2], src_pix2[3*scn+bidx], 16.5f)))); + y1.x = convert_uchar_sat(fma(coeffs[0], src_pix1[ 2-BIDX], fma(coeffs[1], src_pix1[ 1], fma(coeffs[2], src_pix1[ BIDX], 16.5f)))); + y1.y = convert_uchar_sat(fma(coeffs[0], src_pix1[ SCN+2-BIDX], fma(coeffs[1], src_pix1[ SCN+1], fma(coeffs[2], src_pix1[ SCN+BIDX], 16.5f)))); + y1.z = convert_uchar_sat(fma(coeffs[0], src_pix1[2*SCN+2-BIDX], fma(coeffs[1], src_pix1[2*SCN+1], fma(coeffs[2], src_pix1[2*SCN+BIDX], 16.5f)))); + y1.w = convert_uchar_sat(fma(coeffs[0], src_pix1[3*SCN+2-BIDX], fma(coeffs[1], src_pix1[3*SCN+1], fma(coeffs[2], src_pix1[3*SCN+BIDX], 16.5f)))); + y2.x = convert_uchar_sat(fma(coeffs[0], src_pix2[ 2-BIDX], fma(coeffs[1], src_pix2[ 1], fma(coeffs[2], src_pix2[ BIDX], 16.5f)))); + y2.y = convert_uchar_sat(fma(coeffs[0], src_pix2[ SCN+2-BIDX], fma(coeffs[1], src_pix2[ SCN+1], fma(coeffs[2], src_pix2[ SCN+BIDX], 16.5f)))); + y2.z = convert_uchar_sat(fma(coeffs[0], src_pix2[2*SCN+2-BIDX], fma(coeffs[1], src_pix2[2*SCN+1], fma(coeffs[2], src_pix2[2*SCN+BIDX], 16.5f)))); + y2.w = convert_uchar_sat(fma(coeffs[0], src_pix2[3*SCN+2-BIDX], fma(coeffs[1], src_pix2[3*SCN+1], fma(coeffs[2], src_pix2[3*SCN+BIDX], 16.5f)))); *((__global int*) ydst1) = as_int(y1); *((__global int*) ydst2) = as_int(y2); - float uv[4] = { fma(coeffs[3], src_pix1[ 2-bidx], fma(coeffs[4], src_pix1[ 1], fma(coeffs[5], src_pix1[ bidx], 128.5f))), - fma(coeffs[5], src_pix1[ 2-bidx], fma(coeffs[6], src_pix1[ 1], fma(coeffs[7], src_pix1[ bidx], 128.5f))), - fma(coeffs[3], src_pix1[2*scn+2-bidx], fma(coeffs[4], src_pix1[2*scn+1], fma(coeffs[5], src_pix1[2*scn+bidx], 128.5f))), - fma(coeffs[5], src_pix1[2*scn+2-bidx], fma(coeffs[6], src_pix1[2*scn+1], fma(coeffs[7], src_pix1[2*scn+bidx], 128.5f))) }; + float uv[4] = { fma(coeffs[3], src_pix1[ 2-BIDX], fma(coeffs[4], src_pix1[ 1], fma(coeffs[5], src_pix1[ BIDX], 128.5f))), + fma(coeffs[5], src_pix1[ 2-BIDX], fma(coeffs[6], src_pix1[ 1], fma(coeffs[7], src_pix1[ BIDX], 128.5f))), + fma(coeffs[3], src_pix1[2*SCN+2-BIDX], fma(coeffs[4], src_pix1[2*SCN+1], fma(coeffs[5], src_pix1[2*SCN+BIDX], 128.5f))), + fma(coeffs[5], src_pix1[2*SCN+2-BIDX], fma(coeffs[6], src_pix1[2*SCN+1], fma(coeffs[7], src_pix1[2*SCN+BIDX], 128.5f))) }; - udst[0] = convert_uchar_sat(uv[uidx] ); - vdst[0] = convert_uchar_sat(uv[1 - uidx]); - udst[1] = convert_uchar_sat(uv[2 + uidx]); - vdst[1] = convert_uchar_sat(uv[3 - uidx]); + udst[0] = convert_uchar_sat(uv[UIDX] ); + vdst[0] = convert_uchar_sat(uv[1 - UIDX]); + udst[1] = convert_uchar_sat(uv[2 + UIDX]); + vdst[1] = convert_uchar_sat(uv[3 - UIDX]); #else float4 src_pix1 = convert_float4(vload4(0, src1)); - float4 src_pix2 = convert_float4(vload4(0, src1+scn)); + float4 src_pix2 = convert_float4(vload4(0, src1+SCN)); float4 src_pix3 = convert_float4(vload4(0, src2)); - float4 src_pix4 = convert_float4(vload4(0, src2+scn)); + float4 src_pix4 = convert_float4(vload4(0, src2+SCN)); ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f)))); ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f)))); @@ -497,8 +497,8 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int float uv[2] = { fma(coeffs[3], src_pix1.R_COMP, fma(coeffs[4], src_pix1.G_COMP, fma(coeffs[5], src_pix1.B_COMP, 128.5f))), fma(coeffs[5], src_pix1.R_COMP, fma(coeffs[6], src_pix1.G_COMP, fma(coeffs[7], src_pix1.B_COMP, 128.5f))) }; - udst[0] = convert_uchar_sat(uv[uidx] ); - vdst[0] = convert_uchar_sat(uv[1-uidx]); + udst[0] = convert_uchar_sat(uv[UIDX] ); + vdst[0] = convert_uchar_sat(uv[1-UIDX]); #endif ++y; src_index += 2*src_step; @@ -520,7 +520,7 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of if (x < cols / 2) { __global const uchar* src = srcptr + mad24(y, src_step, (x << 2) + src_offset); - __global uchar* dst = dstptr + mad24(y, dst_step, mad24(x << 1, dcn, dst_offset)); + __global uchar* dst = dstptr + mad24(y, dst_step, mad24(x << 1, DCN, dst_offset)); #pragma unroll for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) @@ -530,34 +530,34 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of __constant float* coeffs = c_YUV2RGBCoeffs_420; #ifndef USE_OPTIMIZED_LOAD - float U = ((float) src[uidx]) - HALF_MAX_NUM; - float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX_NUM; - float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0]; - float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0]; + float U = ((float) src[UIDX]) - HALF_MAX_NUM; + float V = ((float) src[(2 + UIDX) % 4]) - HALF_MAX_NUM; + float y00 = max(0.f, ((float) src[YIDX]) - 16.f) * coeffs[0]; + float y01 = max(0.f, ((float) src[YIDX + 2]) - 16.f) * coeffs[0]; #else int load_src = *((__global int*) src); float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff}; - float U = vec_src[uidx] - HALF_MAX_NUM; - float V = vec_src[(2 + uidx) % 4] - HALF_MAX_NUM; - float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0]; - float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0]; + float U = vec_src[UIDX] - HALF_MAX_NUM; + float V = vec_src[(2 + UIDX) % 4] - HALF_MAX_NUM; + float y00 = max(0.f, vec_src[YIDX] - 16.f) * coeffs[0]; + float y01 = max(0.f, vec_src[YIDX + 2] - 16.f) * coeffs[0]; #endif float ruv = fma(coeffs[4], V, 0.5f); float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f)); float buv = fma(coeffs[1], U, 0.5f); - dst[2 - bidx] = convert_uchar_sat(y00 + ruv); + dst[2 - BIDX] = convert_uchar_sat(y00 + ruv); dst[1] = convert_uchar_sat(y00 + guv); - dst[bidx] = convert_uchar_sat(y00 + buv); -#if dcn == 4 + dst[BIDX] = convert_uchar_sat(y00 + buv); +#if DCN == 4 dst[3] = 255; #endif - dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv); - dst[dcn + 1] = convert_uchar_sat(y01 + guv); - dst[dcn + bidx] = convert_uchar_sat(y01 + buv); -#if dcn == 4 + dst[DCN + 2 - BIDX] = convert_uchar_sat(y01 + ruv); + dst[DCN + 1] = convert_uchar_sat(y01 + guv); + dst[DCN + BIDX] = convert_uchar_sat(y01 + buv); +#if DCN == 4 dst[7] = 255; #endif } @@ -600,7 +600,7 @@ __kernel void RGB2YUV_422(__global const uchar* srcptr, int src_step, int src_of __global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index); DATA_TYPE_3 src_pix1 = vload3(0, src); DATA_TYPE b1 = src_pix1.B_COMP, g1 = src_pix1.G_COMP, r1 = src_pix1.R_COMP; - DATA_TYPE_3 src_pix2 = vload3(0, src+scn); + DATA_TYPE_3 src_pix2 = vload3(0, src+SCN); DATA_TYPE b2 = src_pix2.B_COMP, g2 = src_pix2.G_COMP, r2 = src_pix2.R_COMP; @@ -625,10 +625,10 @@ __kernel void RGB2YUV_422(__global const uchar* srcptr, int src_step, int src_of const res_dtype U = MAC_fn(coeffs[5], sr, coeffs[1] + MAC_fn(coeffs[6], sg, mul_fn(coeffs[7], sb))); const res_dtype V = MAC_fn(coeffs[7], sr, coeffs[1] + MAC_fn(coeffs[8], sg, mul_fn(coeffs[9], sb))); - dst[uidx] = output_scale_fn(U); - dst[(2 + uidx) % 4] = output_scale_fn(V); - dst[yidx] = output_scale_fn(Y1); - dst[yidx+2] = output_scale_fn(Y2); + dst[UIDX] = output_scale_fn(U); + dst[(2 + UIDX) % 4] = output_scale_fn(V); + dst[YIDX] = output_scale_fn(Y1); + dst[YIDX+2] = output_scale_fn(Y2); ++y; dst_index += dst_step; @@ -728,10 +728,10 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset, int b = yp + CV_DESCALE(coeff[3] * (cb - HALF_MAX_NUM), yuv_shift); #endif - dstptr[(bidx^2)] = SAT_CAST(r); + dstptr[(BIDX^2)] = SAT_CAST(r); dstptr[1] = SAT_CAST(g); - dstptr[bidx] = SAT_CAST(b); -#if dcn == 4 + dstptr[BIDX] = SAT_CAST(b); +#if DCN == 4 dstptr[3] = MAX_NUM; #endif diff --git a/modules/imgproc/src/opencl/laplacian5.cl b/modules/imgproc/src/opencl/laplacian5.cl index 1404a8c51e..d6f7b9e664 100644 --- a/modules/imgproc/src/opencl/laplacian5.cl +++ b/modules/imgproc/src/opencl/laplacian5.cl @@ -13,25 +13,25 @@ __kernel void sumConvert(__global const uchar * src1ptr, int src1_step, int src1_offset, __global const uchar * src2ptr, int src2_step, int src2_offset, __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, - coeffT scale, coeffT delta) + COEFF_T scale, COEFF_T delta) { int x = get_global_id(0); int y = get_global_id(1); if (y < dst_rows && x < dst_cols) { - int src1_index = mad24(y, src1_step, mad24(x, (int)sizeof(srcT), src1_offset)); - int src2_index = mad24(y, src2_step, mad24(x, (int)sizeof(srcT), src2_offset)); - int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(dstT), dst_offset)); + int src1_index = mad24(y, src1_step, mad24(x, (int)sizeof(SRC_T), src1_offset)); + int src2_index = mad24(y, src2_step, mad24(x, (int)sizeof(SRC_T), src2_offset)); + int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(DST_T), dst_offset)); - __global const srcT * src1 = (__global const srcT *)(src1ptr + src1_index); - __global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); - __global dstT * dst = (__global dstT *)(dstptr + dst_index); + __global const SRC_T * src1 = (__global const SRC_T *)(src1ptr + src1_index); + __global const SRC_T * src2 = (__global const SRC_T *)(src2ptr + src2_index); + __global DST_T * dst = (__global DST_T *)(dstptr + dst_index); -#if wdepth <= 4 - dst[0] = convertToDT( mad24((WT)(scale), convertToWT(src1[0]) + convertToWT(src2[0]), (WT)(delta)) ); +#if WDEPTH <= 4 + dst[0] = CONVERT_TO_DT( mad24((WT)(scale), CONVERT_TO_WT(src1[0]) + CONVERT_TO_WT(src2[0]), (WT)(delta)) ); #else - dst[0] = convertToDT( mad((WT)(scale), convertToWT(src1[0]) + convertToWT(src2[0]), (WT)(delta)) ); + dst[0] = CONVERT_TO_DT( mad((WT)(scale), CONVERT_TO_WT(src1[0]) + CONVERT_TO_WT(src2[0]), (WT)(delta)) ); #endif } } @@ -74,18 +74,18 @@ __kernel void sumConvert(__global const uchar * src1ptr, int src1_step, int src1 #endif #if CN != 3 -#define loadpix(addr) *(__global const srcT *)(addr) -#define storepix(val, addr) *(__global dstT *)(addr) = val -#define SRCSIZE (int)sizeof(srcT) -#define DSTSIZE (int)sizeof(dstT) +#define loadpix(addr) *(__global const SRC_T *)(addr) +#define storepix(val, addr) *(__global DST_T *)(addr) = val +#define SRCSIZE (int)sizeof(SRC_T) +#define DSTSIZE (int)sizeof(DST_T) #else -#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr)) -#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr)) -#define SRCSIZE (int)sizeof(srcT1)*3 -#define DSTSIZE (int)sizeof(dstT1)*3 +#define loadpix(addr) vload3(0, (__global const SRC_T1 *)(addr)) +#define storepix(val, addr) vstore3(val, 0, (__global DST_T1 *)(addr)) +#define SRCSIZE (int)sizeof(SRC_T1)*3 +#define DSTSIZE (int)sizeof(DST_T1)*3 #endif -#define SRC(_x,_y) convertToWT(loadpix(Src + mad24(_y, src_step, SRCSIZE * _x))) +#define SRC(_x,_y) CONVERT_TO_WT(loadpix(Src + mad24(_y, src_step, SRCSIZE * _x))) #ifdef BORDER_CONSTANT // CCCCCC|abcdefgh|CCCCCCC @@ -173,7 +173,7 @@ __kernel void laplacian(__global uchar* Src, int src_step, int srcOffsetX, int s } WT sum = mad(scale_v, (sum1 + sum2), delta_v); - storepix(convertToDT(sum), Dst + mad24(y + liy, dst_step, mad24(x, DSTSIZE, dst_offset))); + storepix(CONVERT_TO_DT(sum), Dst + mad24(y + liy, dst_step, mad24(x, DSTSIZE, dst_offset))); } for (int i = liy * BLK_X + lix; i < (RADIUS*2) * (BLK_X+(RADIUS*2)); i += BLK_X * BLK_Y) @@ -203,4 +203,4 @@ __kernel void laplacian(__global uchar* Src, int src_step, int srcOffsetX, int s } } -#endif \ No newline at end of file +#endif diff --git a/modules/imgproc/src/opencl/pyr_down.cl b/modules/imgproc/src/opencl/pyr_down.cl index 5d2e7156ef..22b1472b3e 100644 --- a/modules/imgproc/src/opencl/pyr_down.cl +++ b/modules/imgproc/src/opencl/pyr_down.cl @@ -67,7 +67,7 @@ #error No extrapolation method #endif -#if cn != 3 +#if CN != 3 #define loadpix(addr) *(__global const T*)(addr) #define storepix(val, addr) *(__global T*)(addr) = (val) #define PIXSIZE ((int)sizeof(T)) @@ -77,9 +77,9 @@ #define PIXSIZE ((int)sizeof(T1)*3) #endif -#define SRC(_x,_y) convertToFT(loadpix(srcData + mad24(_y, src_step, PIXSIZE * _x))) +#define SRC(_x,_y) CONVERT_TO_FT(loadpix(srcData + mad24(_y, src_step, PIXSIZE * _x))) -#if kercn == 4 +#if KERCN == 4 #define SRC4(_x,_y) convert_float4(vload4(0, srcData + mad24(_y, src_step, PIXSIZE * _x))) #endif @@ -107,7 +107,7 @@ smem[1][col_lcl] = sum1; -#if kercn == 4 +#if KERCN == 4 #define LOAD_LOCAL4(col_gl, col_lcl) \ sum40 = co3* SRC4(col_gl, EXTRAPOLATE_(src_y - 2, src_rows)); \ sum40 = MAD(co2, SRC4(col_gl, EXTRAPOLATE_(src_y - 1, src_rows)), sum40); \ @@ -131,7 +131,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) { - const int x = get_global_id(0)*kercn; + const int x = get_global_id(0)*KERCN; const int y = 2*get_global_id(1); __local FT smem[2][LOCAL_SIZE + 4]; @@ -150,7 +150,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, { #undef EXTRAPOLATE_ #define EXTRAPOLATE_(val, maxVal) val -#if kercn == 1 +#if KERCN == 1 col = EXTRAPOLATE(x, src_cols); LOAD_LOCAL(col, 2 + get_local_id(0)) #else @@ -183,7 +183,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, { #undef EXTRAPOLATE_ #define EXTRAPOLATE_(val, maxVal) EXTRAPOLATE(val, maxVal) -#if kercn == 1 +#if KERCN == 1 col = EXTRAPOLATE(x, src_cols); LOAD_LOCAL(col, 2 + get_local_id(0)) #else @@ -215,7 +215,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, barrier(CLK_LOCAL_MEM_FENCE); -#if kercn == 1 +#if KERCN == 1 if (get_local_id(0) < LOCAL_SIZE / 2) { const int tid2 = get_local_id(0) * 2; @@ -226,8 +226,8 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, { for (int yin = y, y1 = min(dst_rows, y + 2); yin < y1; yin++) { -#if cn == 1 -#if fdepth <= 5 +#if CN == 1 +#if FDEPTH <= 5 FT sum = dot(vload4(0, (__local float*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2)); #else FT sum = dot(vload4(0, (__local double*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2)); @@ -239,7 +239,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, sum = MAD(co2, smem[yin - y][2 + tid2 + 1], sum); #endif sum = MAD(co3, smem[yin - y][2 + tid2 + 2], sum); - storepix(convertToT(sum), dstData + yin * dst_step + dst_x * PIXSIZE); + storepix(CONVERT_TO_T(sum), dstData + yin * dst_step + dst_x * PIXSIZE); } } } @@ -256,7 +256,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, sum = MAD(co2, smem[yin - y][2 + tid4 - 1], sum); sum = MAD(co1, smem[yin - y][2 + tid4 ], sum); sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum); - storepix(convertToT(sum), dstData + mad24(yin, dst_step, dst_x * PIXSIZE)); + storepix(CONVERT_TO_T(sum), dstData + mad24(yin, dst_step, dst_x * PIXSIZE)); dst_x ++; sum = co3* smem[yin - y][2 + tid4 + 4]; @@ -264,7 +264,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum); sum = MAD(co1, smem[yin - y][2 + tid4 + 2], sum); sum = MAD(co2, smem[yin - y][2 + tid4 + 3], sum); - storepix(convertToT(sum), dstData + mad24(yin, dst_step, dst_x * PIXSIZE)); + storepix(CONVERT_TO_T(sum), dstData + mad24(yin, dst_step, dst_x * PIXSIZE)); dst_x --; } @@ -279,7 +279,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, sum = MAD(co1, smem[yin - y][2 + tid4 ], sum); sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum); - storepix(convertToT(sum), dstData + mad24(yin, dst_step, dst_x * PIXSIZE)); + storepix(CONVERT_TO_T(sum), dstData + mad24(yin, dst_step, dst_x * PIXSIZE)); } } #endif diff --git a/modules/imgproc/src/opencl/pyr_up.cl b/modules/imgproc/src/opencl/pyr_up.cl index d033d7ee4e..194be1c830 100644 --- a/modules/imgproc/src/opencl/pyr_up.cl +++ b/modules/imgproc/src/opencl/pyr_up.cl @@ -58,7 +58,7 @@ #endif #endif -#if cn != 3 +#if CN != 3 #define loadpix(addr) *(__global const T*)(addr) #define storepix(val, addr) *(__global T*)(addr) = (val) #define PIXSIZE ((int)sizeof(T)) @@ -92,7 +92,7 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in int srcx = EXTRAPOLATE(mad24((int)get_group_id(0), LOCAL_SIZE/2, tidx) - 1, src_cols); int srcy = EXTRAPOLATE(mad24((int)get_group_id(1), LOCAL_SIZE/2, tidy) - 1, src_rows); - s_srcPatch[tidy][tidx] = convertToFT(loadpix(srcData + srcy * src_step + srcx * PIXSIZE)); + s_srcPatch[tidy][tidx] = CONVERT_TO_FT(loadpix(srcData + srcy * src_step + srcx * PIXSIZE)); } barrier(CLK_LOCAL_MEM_FENCE); @@ -124,7 +124,7 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in sum = mad(coefy2, s_dstPatch[1 + ((tidy + 2) >> 1)][tidx], sum); if ((x < dst_cols) && (y < dst_rows)) - storepix(convertToT(sum), dstData + y * dst_step + x * PIXSIZE); + storepix(CONVERT_TO_T(sum), dstData + y * dst_step + x * PIXSIZE); } @@ -149,10 +149,10 @@ __kernel void pyrUp_unrolled(__global const uchar * src, int src_step, int src_o int srcx2 = EXTRAPOLATE(srcx+1, src_cols); int srcy1 = EXTRAPOLATE(srcy, src_rows); int srcy2 = EXTRAPOLATE(srcy+1, src_rows); - s_srcPatch[ly][lx] = convertToFT(loadpix(srcData + srcy1 * src_step + srcx1 * PIXSIZE)); - s_srcPatch[ly+1][lx] = convertToFT(loadpix(srcData + srcy2 * src_step + srcx1 * PIXSIZE)); - s_srcPatch[ly][lx+1] = convertToFT(loadpix(srcData + srcy1 * src_step + srcx2 * PIXSIZE)); - s_srcPatch[ly+1][lx+1] = convertToFT(loadpix(srcData + srcy2 * src_step + srcx2 * PIXSIZE)); + s_srcPatch[ly][lx] = CONVERT_TO_FT(loadpix(srcData + srcy1 * src_step + srcx1 * PIXSIZE)); + s_srcPatch[ly+1][lx] = CONVERT_TO_FT(loadpix(srcData + srcy2 * src_step + srcx1 * PIXSIZE)); + s_srcPatch[ly][lx+1] = CONVERT_TO_FT(loadpix(srcData + srcy1 * src_step + srcx2 * PIXSIZE)); + s_srcPatch[ly+1][lx+1] = CONVERT_TO_FT(loadpix(srcData + srcy2 * src_step + srcx2 * PIXSIZE)); } barrier(CLK_LOCAL_MEM_FENCE); @@ -213,22 +213,22 @@ __kernel void pyrUp_unrolled(__global const uchar * src, int src_step, int src_o sum = co3 * s_dstPatch[1 + get_local_id(1) - 1][lx]; sum = mad(co1, s_dstPatch[1 + get_local_id(1) ][lx], sum); sum = mad(co3, s_dstPatch[1 + get_local_id(1) + 1][lx], sum); - storepix(convertToT(sum), dstData + dst_y * dst_step + dst_x * PIXSIZE); + storepix(CONVERT_TO_T(sum), dstData + dst_y * dst_step + dst_x * PIXSIZE); // (x+1,y) sum = co3 * s_dstPatch[1 + get_local_id(1) - 1][lx+1]; sum = mad(co1, s_dstPatch[1 + get_local_id(1) ][lx+1], sum); sum = mad(co3, s_dstPatch[1 + get_local_id(1) + 1][lx+1], sum); - storepix(convertToT(sum), dstData + dst_y * dst_step + (dst_x+1) * PIXSIZE); + storepix(CONVERT_TO_T(sum), dstData + dst_y * dst_step + (dst_x+1) * PIXSIZE); // (x,y+1) sum = co2 * s_dstPatch[1 + get_local_id(1) ][lx]; sum = mad(co2, s_dstPatch[1 + get_local_id(1) + 1][lx], sum); - storepix(convertToT(sum), dstData + (dst_y+1) * dst_step + dst_x * PIXSIZE); + storepix(CONVERT_TO_T(sum), dstData + (dst_y+1) * dst_step + dst_x * PIXSIZE); // (x+1,y+1) sum = co2 * s_dstPatch[1 + get_local_id(1) ][lx+1]; sum = mad(co2, s_dstPatch[1 + get_local_id(1) + 1][lx+1], sum); - storepix(convertToT(sum), dstData + (dst_y+1) * dst_step + (dst_x+1) * PIXSIZE); + storepix(CONVERT_TO_T(sum), dstData + (dst_y+1) * dst_step + (dst_x+1) * PIXSIZE); } } diff --git a/modules/imgproc/src/opencl/remap.cl b/modules/imgproc/src/opencl/remap.cl index 1a30c326b9..8c8b933ab5 100644 --- a/modules/imgproc/src/opencl/remap.cl +++ b/modules/imgproc/src/opencl/remap.cl @@ -53,7 +53,7 @@ #define noconvert -#if cn != 3 +#if CN != 3 #define loadpix(addr) *(__global const T*)(addr) #define storepix(val, addr) *(__global T*)(addr) = val #define TSIZE ((int)sizeof(T)) @@ -73,7 +73,7 @@ enum }; #ifdef INTER_NEAREST -#define convertToWT +#define CONVERT_TO_WT #endif #ifdef BORDER_CONSTANT @@ -82,7 +82,7 @@ enum #define EXTRAPOLATE(v2, v) \ { \ v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), (int2)(0)); \ - v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ + v = CONVERT_TO_WT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ } #elif defined BORDER_WRAP #define EXTRAPOLATE(v2, v) \ @@ -96,7 +96,7 @@ enum v2.y -= ((v2.y - src_rows + 1) / src_rows) * src_rows; \ if( v2.y >= src_rows ) \ v2.y %= src_rows; \ - v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ + v = CONVERT_TO_WT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ } #elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101) #ifdef BORDER_REFLECT @@ -130,7 +130,7 @@ enum v2.y = src_rows - 1 - (v2.y - src_rows) - delta; \ } \ while (v2.y >= src_rows || v2.y < 0); \ - v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ + v = CONVERT_TO_WT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \ } #else #error No extrapolation method @@ -147,7 +147,7 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src ST nVal) { int x = get_global_id(0); - int y = get_global_id(1) * rowsPerWI; + int y = get_global_id(1) * ROWS_PER_WI; if (x < dst_cols) { @@ -158,7 +158,7 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); #pragma unroll - for (int i = 0; i < rowsPerWI; ++i, ++y, + for (int i = 0; i < ROWS_PER_WI; ++i, ++y, map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) if (y < dst_rows) { @@ -193,7 +193,7 @@ __kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_o ST nVal) { int x = get_global_id(0); - int y = get_global_id(1) * rowsPerWI; + int y = get_global_id(1) * ROWS_PER_WI; if (x < dst_cols) { @@ -202,7 +202,7 @@ __kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_o int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset)); #pragma unroll - for (int i = 0; i < rowsPerWI; ++i, ++y, + for (int i = 0; i < ROWS_PER_WI; ++i, ++y, map_index += map_step, dst_index += dst_step) if (y < dst_rows) { @@ -233,7 +233,7 @@ __kernel void remap_16SC2(__global const uchar * srcptr, int src_step, int src_o ST nVal) { int x = get_global_id(0); - int y = get_global_id(1) * rowsPerWI; + int y = get_global_id(1) * ROWS_PER_WI; if (x < dst_cols) { @@ -242,7 +242,7 @@ __kernel void remap_16SC2(__global const uchar * srcptr, int src_step, int src_o int map_index = mad24(y, map_step, mad24(x, (int)sizeof(short2), map_offset)); #pragma unroll - for (int i = 0; i < rowsPerWI; ++i, ++y, + for (int i = 0; i < ROWS_PER_WI; ++i, ++y, map_index += map_step, dst_index += dst_step) if (y < dst_rows) { @@ -274,7 +274,7 @@ __kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int ST nVal) { int x = get_global_id(0); - int y = get_global_id(1) * rowsPerWI; + int y = get_global_id(1) * ROWS_PER_WI; if (x < dst_cols) { @@ -284,7 +284,7 @@ __kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset)); #pragma unroll - for (int i = 0; i < rowsPerWI; ++i, ++y, + for (int i = 0; i < ROWS_PER_WI; ++i, ++y, map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) if (y < dst_rows) { @@ -330,17 +330,17 @@ __kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int ST nVal) { int x = get_global_id(0); - int y = get_global_id(1) * rowsPerWI; + int y = get_global_id(1) * ROWS_PER_WI; if (x < dst_cols) { - WT scalar = convertToWT(convertScalar(nVal)); + WT scalar = CONVERT_TO_WT(convertScalar(nVal)); int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(short2), map1_offset)); int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset)); #pragma unroll - for (int i = 0; i < rowsPerWI; ++i, ++y, + for (int i = 0; i < ROWS_PER_WI; ++i, ++y, map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) if (y < dst_rows) { @@ -359,22 +359,22 @@ __kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int WT a = scalar, b = scalar, c = scalar, d = scalar; if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) - a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); + a = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataA, a); if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) - b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); + b = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataB, b); if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) - c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); + c = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataC, c); if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) - d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); + d = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataD, d); @@ -382,7 +382,7 @@ __kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int b * (u.x) * (1 - u.y) + c * (1 - u.x) * (u.y) + d * (u.x) * (u.y); - storepix(convertToT(dst_data), dst); + storepix(CONVERT_TO_T(dst_data), dst); } } } @@ -394,17 +394,17 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src ST nVal) { int x = get_global_id(0); - int y = get_global_id(1) * rowsPerWI; + int y = get_global_id(1) * ROWS_PER_WI; if (x < dst_cols) { - WT scalar = convertToWT(convertScalar(nVal)); + WT scalar = CONVERT_TO_WT(convertScalar(nVal)); int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset)); int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset)); #pragma unroll - for (int i = 0; i < rowsPerWI; ++i, ++y, + for (int i = 0; i < ROWS_PER_WI; ++i, ++y, map1_index += map1_step, map2_index += map2_step, dst_index += dst_step) if (y < dst_rows) { @@ -431,13 +431,13 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src xsum = (WT)(0); if (sx >= 0 && sx + 2 < src_cols) { -#if depth == 0 && cn == 1 +#if SRC_DEPTH == 0 && CN == 1 uchar2 value = vload2(0, srcptr + src_index); xsum = dot(convert_float2(value), (float2)(coeffs_x[0], coeffs_x[1])); #else #pragma unroll for (int xp = 0; xp < 2; ++xp) - xsum = fma(convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))), coeffs_x[xp], xsum); + xsum = fma(CONVERT_TO_WT(loadpix(srcptr + mad24(xp, TSIZE, src_index))), coeffs_x[xp], xsum); #endif } else @@ -445,7 +445,7 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src #pragma unroll for (int xp = 0; xp < 2; ++xp) xsum = fma(sx + xp >= 0 && sx + xp < src_cols ? - convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))) : scalar, coeffs_x[xp], xsum); + CONVERT_TO_WT(loadpix(srcptr + mad24(xp, TSIZE, src_index))) : scalar, coeffs_x[xp], xsum); } sum = fma(xsum, coeffs_y[yp], sum); } @@ -453,7 +453,7 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src sum = fma(scalar, coeffs_y[yp], sum); } - storepix(convertToT(sum), dst); + storepix(CONVERT_TO_T(sum), dst); #else float2 map_data = (float2)(map1[0], map2[0]); @@ -463,27 +463,27 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1); float2 _u = map_data - convert_float2(map_dataA); - WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE; - WT scalar = convertToWT(convertScalar(nVal)); + WT2 u = CONVERT_TO_WT2(convert_int2_rte(CONVERT_TO_WT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE; + WT scalar = CONVERT_TO_WT(convertScalar(nVal)); WT a = scalar, b = scalar, c = scalar, d = scalar; if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) - a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); + a = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataA, a); if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) - b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); + b = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataB, b); if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) - c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); + c = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataC, c); if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) - d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); + d = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataD, d); @@ -491,7 +491,7 @@ __kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src b * (u.x) * (1 - u.y) + c * (1 - u.x) * (u.y) + d * (u.x) * (u.y); - storepix(convertToT(dst_data), dst); + storepix(CONVERT_TO_T(dst_data), dst); #endif } } @@ -503,16 +503,16 @@ __kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_o ST nVal) { int x = get_global_id(0); - int y = get_global_id(1) * rowsPerWI; + int y = get_global_id(1) * ROWS_PER_WI; if (x < dst_cols) { - WT scalar = convertToWT(convertScalar(nVal)); + WT scalar = CONVERT_TO_WT(convertScalar(nVal)); int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset)); #pragma unroll - for (int i = 0; i < rowsPerWI; ++i, ++y, + for (int i = 0; i < ROWS_PER_WI; ++i, ++y, map_index += map_step, dst_index += dst_step) if (y < dst_rows) { @@ -526,26 +526,26 @@ __kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_o int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1); float2 _u = map_data - convert_float2(map_dataA); - WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE; + WT2 u = CONVERT_TO_WT2(convert_int2_rte(CONVERT_TO_WT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE; WT a = scalar, b = scalar, c = scalar, d = scalar; if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y)) - a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); + a = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataA, a); if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y)) - b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); + b = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataB, b); if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y)) - c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); + c = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataC, c); if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y)) - d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); + d = CONVERT_TO_WT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset)))); else EXTRAPOLATE(map_dataD, d); @@ -553,7 +553,7 @@ __kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_o b * (u.x) * (1 - u.y) + c * (1 - u.x) * (u.y) + d * (u.x) * (u.y); - storepix(convertToT(dst_data), dst); + storepix(CONVERT_TO_T(dst_data), dst); } } } diff --git a/modules/imgproc/src/opencl/resize.cl b/modules/imgproc/src/opencl/resize.cl index a28c59296e..e31c80dc3b 100644 --- a/modules/imgproc/src/opencl/resize.cl +++ b/modules/imgproc/src/opencl/resize.cl @@ -55,44 +55,44 @@ #define noconvert -#if cn != 3 +#if CN != 3 #define loadpix(addr) *(__global const T *)(addr) #define storepix(val, addr) *(__global T *)(addr) = val #define TSIZE (int)sizeof(T) #else #define loadpix(addr) vload3(0, (__global const T1 *)(addr)) #define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr)) -#define TSIZE (int)sizeof(T1)*cn +#define TSIZE (int)sizeof(T1)*CN #endif #if defined USE_SAMPLER -#if cn == 1 +#if CN == 1 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x #define INTERMEDIATE_TYPE float -#elif cn == 2 +#elif CN == 2 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy #define INTERMEDIATE_TYPE float2 -#elif cn == 3 +#elif CN == 3 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz #define INTERMEDIATE_TYPE float3 -#elif cn == 4 +#elif CN == 4 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z) #define INTERMEDIATE_TYPE float4 #endif #define __CAT(x, y) x##y #define CAT(x, y) __CAT(x, y) -//#define INTERMEDIATE_TYPE CAT(float, cn) +//#define INTERMEDIATE_TYPE CAT(float, CN) #define float1 float -#if depth == 0 +#if SRC_DEPTH == 0 #define RESULT_SCALE 255.0f -#elif depth == 1 +#elif SRC_DEPTH == 1 #define RESULT_SCALE 127.0f -#elif depth == 2 +#elif SRC_DEPTH == 2 #define RESULT_SCALE 65535.0f -#elif depth == 3 +#elif SRC_DEPTH == 3 #define RESULT_SCALE 32767.0f #else #define RESULT_SCALE 1.0f @@ -114,10 +114,10 @@ __kernel void resizeSampler(__read_only image2d_t srcImage, INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy)); -#if depth <= 4 - T uval = convertToDT(round(intermediate * RESULT_SCALE)); +#if SRC_DEPTH <= 4 + T uval = CONVERT_TO_DT(round(intermediate * RESULT_SCALE)); #else - T uval = convertToDT(intermediate * RESULT_SCALE); + T uval = CONVERT_TO_DT(intermediate * RESULT_SCALE); #endif if(dx < dstcols && dy < dstrows) @@ -149,15 +149,15 @@ __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offs int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)), src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset)); - WT data0 = convertToWT(loadpix(srcptr + src_index0)); - WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE)); - WT data2 = convertToWT(loadpix(srcptr + src_index1)); - WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE)); + WT data0 = CONVERT_TO_WT(loadpix(srcptr + src_index0)); + WT data1 = CONVERT_TO_WT(loadpix(srcptr + src_index0 + TSIZE)); + WT data2 = CONVERT_TO_WT(loadpix(srcptr + src_index1)); + WT data3 = CONVERT_TO_WT(loadpix(srcptr + src_index1 + TSIZE)); WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) + ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16); - storepix(convertToDT((val + 2) >> 2), + storepix(CONVERT_TO_DT((val + 2) >> 2), dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); } } @@ -186,7 +186,7 @@ __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offs int y_ = INC(y, src_rows); int x_ = INC(x, src_cols); -#if depth <= 1 // 8U/8S only, 16U+ cause integer overflows +#if SRC_DEPTH <= 1 // 8U/8S only, 16U+ cause integer overflows #define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS) #define CAST_BITS (INTER_RESIZE_COEF_BITS << 1) u = u * INTER_RESIZE_COEF_SCALE; @@ -197,24 +197,24 @@ __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offs int U1 = rint(INTER_RESIZE_COEF_SCALE - u); int V1 = rint(INTER_RESIZE_COEF_SCALE - v); - WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); - WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); - WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); - WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); + WT data0 = CONVERT_TO_WT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); + WT data1 = CONVERT_TO_WT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); + WT data2 = CONVERT_TO_WT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); + WT data3 = CONVERT_TO_WT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) + mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3); - T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS); + T uval = CONVERT_TO_DT((val + (1<<(CAST_BITS-1)))>>CAST_BITS); #else float u1 = 1.f - u; float v1 = 1.f - v; - WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); - WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); - WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); - WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); + WT data0 = CONVERT_TO_WT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); + WT data1 = CONVERT_TO_WT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); + WT data2 = CONVERT_TO_WT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); + WT data3 = CONVERT_TO_WT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); - T uval = convertToDT((u1 * v1) * data0 + (u * v1) * data1 + (u1 * v) * data2 + (u * v) * data3); + T uval = CONVERT_TO_DT((u1 * v1) * data0 + (u * v1) * data1 + (u1 * v) * data2 + (u * v) * data3); #endif storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); } @@ -268,11 +268,11 @@ __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_ for (int px = 0; px < XSCALE; ++px) { int x = min(sx + px, src_cols - 1); - sum += convertToWTV(loadpix(src + src_index + x*TSIZE)); + sum += CONVERT_TO_WTV(loadpix(src + src_index + x*TSIZE)); } } - storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index)); + storepix(CONVERT_TO_T(CONVERT_TO_WT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index)); } } @@ -314,12 +314,12 @@ __kernel void resizeAREA(__global const uchar * src, int src_step, int src_offse for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk) { WTV alpha = (WTV)(xalpha_tab[xk]); - buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha; + buf += CONVERT_TO_WTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha; } sum += buf * beta; } - storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index)); + storepix(CONVERT_TO_T(sum), dst + mad24(dx, TSIZE, dst_index)); } } diff --git a/modules/imgproc/src/opencl/warp_affine.cl b/modules/imgproc/src/opencl/warp_affine.cl index bfbd0a6bfb..d937e237ff 100644 --- a/modules/imgproc/src/opencl/warp_affine.cl +++ b/modules/imgproc/src/opencl/warp_affine.cl @@ -66,7 +66,7 @@ #define ST T #endif -#if cn != 3 +#if CN != 3 #define loadpix(addr) *(__global const T*)(addr) #define storepix(val, addr) *(__global T*)(addr) = val #define scalar scalar_ @@ -89,7 +89,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of __constant CT * M, ST scalar_) { int dx = get_global_id(0); - int dy0 = get_global_id(1) * rowsPerWI; + int dy0 = get_global_id(1) * ROWS_PER_WI; if (dx < dst_cols) { @@ -99,7 +99,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of int Y0_ = rint(M[3] * dx * AB_SCALE); int dst_index = mad24(dy0, dst_step, mad24(dx, pixsize, dst_offset)); - for (int dy = dy0, dy1 = min(dst_rows, dy0 + rowsPerWI); dy < dy1; ++dy, dst_index += dst_step) + for (int dy = dy0, dy1 = min(dst_rows, dy0 + ROWS_PER_WI); dy < dy1; ++dy, dst_index += dst_step) { int X0 = X0_ + rint(fma(M[1], (CT)dy, M[2]) * AB_SCALE) + round_delta; int Y0 = Y0_ + rint(fma(M[4], (CT)dy, M[5]) * AB_SCALE) + round_delta; @@ -133,7 +133,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of __constant CT * M, ST scalar_) { int dx = get_global_id(0); - int dy0 = get_global_id(1) * rowsPerWI; + int dy0 = get_global_id(1) * ROWS_PER_WI; if (dx < dst_cols) { @@ -141,7 +141,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of int X0_ = rint(M[0] * tmp); int Y0_ = rint(M[3] * tmp); - for (int dy = dy0, dy1 = min(dst_rows, dy0 + rowsPerWI); dy < dy1; ++dy) + for (int dy = dy0, dy1 = min(dst_rows, dy0 + ROWS_PER_WI); dy < dy1; ++dy) { int X0 = X0_ + rint(fma(M[1], (CT)dy, M[2]) * AB_SCALE) + ROUND_DELTA; int Y0 = Y0_ + rint(fma(M[4], (CT)dy, M[5]) * AB_SCALE) + ROUND_DELTA; @@ -151,21 +151,21 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of short sx = convert_short_sat(X0 >> INTER_BITS), sy = convert_short_sat(Y0 >> INTER_BITS); short ax = convert_short(X0 & (INTER_TAB_SIZE-1)), ay = convert_short(Y0 & (INTER_TAB_SIZE-1)); -#if defined AMD_DEVICE || depth > 4 +#if defined AMD_DEVICE || SRC_DEPTH > 4 WT v0 = scalar, v1 = scalar, v2 = scalar, v3 = scalar; if (sx >= 0 && sx < src_cols) { if (sy >= 0 && sy < src_rows) - v0 = convertToWT(loadpix(srcptr + mad24(sy, src_step, mad24(sx, pixsize, src_offset)))); + v0 = CONVERT_TO_WT(loadpix(srcptr + mad24(sy, src_step, mad24(sx, pixsize, src_offset)))); if (sy+1 >= 0 && sy+1 < src_rows) - v2 = convertToWT(loadpix(srcptr + mad24(sy+1, src_step, mad24(sx, pixsize, src_offset)))); + v2 = CONVERT_TO_WT(loadpix(srcptr + mad24(sy+1, src_step, mad24(sx, pixsize, src_offset)))); } if (sx+1 >= 0 && sx+1 < src_cols) { if (sy >= 0 && sy < src_rows) - v1 = convertToWT(loadpix(srcptr + mad24(sy, src_step, mad24(sx+1, pixsize, src_offset)))); + v1 = CONVERT_TO_WT(loadpix(srcptr + mad24(sy, src_step, mad24(sx+1, pixsize, src_offset)))); if (sy+1 >= 0 && sy+1 < src_rows) - v3 = convertToWT(loadpix(srcptr + mad24(sy+1, src_step, mad24(sx+1, pixsize, src_offset)))); + v3 = CONVERT_TO_WT(loadpix(srcptr + mad24(sy+1, src_step, mad24(sx+1, pixsize, src_offset)))); } float taby = 1.f/INTER_TAB_SIZE*ay; @@ -173,18 +173,18 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of int dst_index = mad24(dy, dst_step, mad24(dx, pixsize, dst_offset)); -#if depth <= 4 +#if SRC_DEPTH <= 4 int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ); int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE ); WT val = mad24(v0, itab0, mad24(v1, itab1, mad24(v2, itab2, v3 * itab3))); - storepix(convertToT((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS), dstptr + dst_index); + storepix(CONVERT_TO_T((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS), dstptr + dst_index); #else float tabx2 = 1.0f - tabx, taby2 = 1.0f - taby; WT val = fma(tabx2, fma(v0, taby2, v2 * taby), tabx * fma(v1, taby2, v3 * taby)); - storepix(convertToT(val), dstptr + dst_index); + storepix(CONVERT_TO_T(val), dstptr + dst_index); #endif #else // INTEL_DEVICE __constant float * coeffs_y = coeffs + (ay << 1), * coeffs_x = coeffs + (ax << 1); @@ -202,13 +202,13 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of xsum = (WT)(0); if (sx >= 0 && sx + 2 < src_cols) { -#if depth == 0 && cn == 1 +#if SRC_DEPTH == 0 && CN == 1 uchar2 value = vload2(0, srcptr + src_index); xsum = dot(convert_float2(value), (float2)(coeffs_x[0], coeffs_x[1])); #else #pragma unroll for (int x = 0; x < 2; x++) - xsum = fma(convertToWT(loadpix(srcptr + mad24(x, pixsize, src_index))), coeffs_x[x], xsum); + xsum = fma(CONVERT_TO_WT(loadpix(srcptr + mad24(x, pixsize, src_index))), coeffs_x[x], xsum); #endif } else @@ -216,7 +216,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of #pragma unroll for (int x = 0; x < 2; x++) xsum = fma(sx + x >= 0 && sx + x < src_cols ? - convertToWT(loadpix(srcptr + mad24(x, pixsize, src_index))) : scalar, coeffs_x[x], xsum); + CONVERT_TO_WT(loadpix(srcptr + mad24(x, pixsize, src_index))) : scalar, coeffs_x[x], xsum); } sum = fma(xsum, coeffs_y[y], sum); } @@ -224,7 +224,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of sum = fma(scalar, coeffs_y[y], sum); } - storepix(convertToT(sum), dstptr + dst_index); + storepix(CONVERT_TO_T(sum), dstptr + dst_index); #endif } } @@ -290,7 +290,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of #pragma unroll for (int x = 0; x < 4; x++) v[mad24(y, 4, x)] = sx+x >= 0 && sx+x < src_cols ? - convertToWT(loadpix(srcptr + mad24(sy+y, src_step, mad24(sx+x, pixsize, src_offset)))) : scalar; + CONVERT_TO_WT(loadpix(srcptr + mad24(sy+y, src_step, mad24(sx+x, pixsize, src_offset)))) : scalar; } else { @@ -310,7 +310,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of int dst_index = mad24(dy, dst_step, mad24(dx, pixsize, dst_offset)); WT sum = (WT)(0); -#if depth <= 4 +#if SRC_DEPTH <= 4 int itab[16]; #pragma unroll @@ -320,12 +320,12 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of #pragma unroll for (int i = 0; i < 16; i++) sum = mad24(v[i], itab[i], sum); - storepix(convertToT( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ), dstptr + dst_index); + storepix(CONVERT_TO_T( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ), dstptr + dst_index); #else #pragma unroll for (int i = 0; i < 16; i++) sum = fma(v[i], tab1y[(i>>2)] * tab1x[(i&3)], sum); - storepix(convertToT( sum ), dstptr + dst_index); + storepix(CONVERT_TO_T( sum ), dstptr + dst_index); #endif #else // INTEL_DEVICE __constant float * coeffs_y = coeffs + (ay << 2), * coeffs_x = coeffs + (ax << 2); @@ -343,13 +343,13 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of xsum = (WT)(0); if (sx >= 0 && sx + 4 < src_cols) { -#if depth == 0 && cn == 1 +#if SRC_DEPTH == 0 && CN == 1 uchar4 value = vload4(0, srcptr + src_index); xsum = dot(convert_float4(value), (float4)(coeffs_x[0], coeffs_x[1], coeffs_x[2], coeffs_x[3])); #else #pragma unroll for (int x = 0; x < 4; x++) - xsum = fma(convertToWT(loadpix(srcptr + mad24(x, pixsize, src_index))), coeffs_x[x], xsum); + xsum = fma(CONVERT_TO_WT(loadpix(srcptr + mad24(x, pixsize, src_index))), coeffs_x[x], xsum); #endif } else @@ -357,7 +357,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of #pragma unroll for (int x = 0; x < 4; x++) xsum = fma(sx + x >= 0 && sx + x < src_cols ? - convertToWT(loadpix(srcptr + mad24(x, pixsize, src_index))) : scalar, coeffs_x[x], xsum); + CONVERT_TO_WT(loadpix(srcptr + mad24(x, pixsize, src_index))) : scalar, coeffs_x[x], xsum); } sum = fma(xsum, coeffs_y[y], sum); } @@ -365,7 +365,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of sum = fma(scalar, coeffs_y[y], sum); } - storepix(convertToT(sum), dstptr + dst_index); + storepix(CONVERT_TO_T(sum), dstptr + dst_index); #endif } } diff --git a/modules/imgproc/src/opencl/warp_perspective.cl b/modules/imgproc/src/opencl/warp_perspective.cl index 20e3a27404..06bc2bd7fa 100644 --- a/modules/imgproc/src/opencl/warp_perspective.cl +++ b/modules/imgproc/src/opencl/warp_perspective.cl @@ -65,7 +65,7 @@ #define ST T #endif -#if cn != 3 +#if CN != 3 #define loadpix(addr) *(__global const T*)(addr) #define storepix(val, addr) *(__global T*)(addr) = val #define scalar scalar_ @@ -134,31 +134,31 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s short ax = (short)(X & (INTER_TAB_SIZE - 1)); WT v0 = (sx >= 0 && sx < src_cols && sy >= 0 && sy < src_rows) ? - convertToWT(loadpix(srcptr + mad24(sy, src_step, src_offset + sx * pixsize))) : scalar; + CONVERT_TO_WT(loadpix(srcptr + mad24(sy, src_step, src_offset + sx * pixsize))) : scalar; WT v1 = (sx+1 >= 0 && sx+1 < src_cols && sy >= 0 && sy < src_rows) ? - convertToWT(loadpix(srcptr + mad24(sy, src_step, src_offset + (sx+1) * pixsize))) : scalar; + CONVERT_TO_WT(loadpix(srcptr + mad24(sy, src_step, src_offset + (sx+1) * pixsize))) : scalar; WT v2 = (sx >= 0 && sx < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? - convertToWT(loadpix(srcptr + mad24(sy+1, src_step, src_offset + sx * pixsize))) : scalar; + CONVERT_TO_WT(loadpix(srcptr + mad24(sy+1, src_step, src_offset + sx * pixsize))) : scalar; WT v3 = (sx+1 >= 0 && sx+1 < src_cols && sy+1 >= 0 && sy+1 < src_rows) ? - convertToWT(loadpix(srcptr + mad24(sy+1, src_step, src_offset + (sx+1) * pixsize))) : scalar; + CONVERT_TO_WT(loadpix(srcptr + mad24(sy+1, src_step, src_offset + (sx+1) * pixsize))) : scalar; float taby = 1.f/INTER_TAB_SIZE*ay; float tabx = 1.f/INTER_TAB_SIZE*ax; int dst_index = mad24(dy, dst_step, dst_offset + dx * pixsize); -#if depth <= 4 +#if SRC_DEPTH <= 4 int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ); int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE ); WT val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3; - storepix(convertToT((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS), dstptr + dst_index); + storepix(CONVERT_TO_T((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS), dstptr + dst_index); #else float tabx2 = 1.0f - tabx, taby2 = 1.0f - taby; WT val = v0 * tabx2 * taby2 + v1 * tabx * taby2 + v2 * tabx2 * taby + v3 * tabx * taby; - storepix(convertToT(val), dstptr + dst_index); + storepix(CONVERT_TO_T(val), dstptr + dst_index); #endif } } @@ -201,7 +201,7 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s #pragma unroll for (int x = 0; x < 4; x++) v[mad24(y, 4, x)] = (sx+x >= 0 && sx+x < src_cols && sy+y >= 0 && sy+y < src_rows) ? - convertToWT(loadpix(srcptr + mad24(sy+y, src_step, src_offset + (sx+x) * pixsize))) : scalar; + CONVERT_TO_WT(loadpix(srcptr + mad24(sy+y, src_step, src_offset + (sx+x) * pixsize))) : scalar; float tab1y[4], tab1x[4]; @@ -213,7 +213,7 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s int dst_index = mad24(dy, dst_step, dst_offset + dx * pixsize); WT sum = (WT)(0); -#if depth <= 4 +#if SRC_DEPTH <= 4 int itab[16]; #pragma unroll @@ -223,12 +223,12 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s #pragma unroll for (int i = 0; i < 16; i++) sum += v[i] * itab[i]; - storepix(convertToT( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ), dstptr + dst_index); + storepix(CONVERT_TO_T( (sum + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS ), dstptr + dst_index); #else #pragma unroll for (int i = 0; i < 16; i++) sum += v[i] * tab1y[(i>>2)] * tab1x[(i&3)]; - storepix(convertToT( sum ), dstptr + dst_index); + storepix(CONVERT_TO_T( sum ), dstptr + dst_index); #endif } } diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index f65ae62158..52f957348d 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -1189,8 +1189,8 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in "BORDER_REFLECT_101" }; char cvt[2][50]; String buildOptions = format( - "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " - "-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d", + "-D T=%s -D FT=%s -D CONVERT_TO_T=%s -D CONVERT_TO_FT=%s%s " + "-D T1=%s -D CN=%d -D KERCN=%d -D FDEPTH=%d -D %s -D LOCAL_SIZE=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)), ocl::convertTypeStr(float_depth, depth, cn, cvt[0], sizeof(cvt[0])), ocl::convertTypeStr(depth, float_depth, cn, cvt[1], sizeof(cvt[1])), @@ -1232,8 +1232,8 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int const int local_size = channels == 1 ? 16 : 8; char cvt[2][50]; String buildOptions = format( - "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " - "-D T1=%s -D cn=%d -D LOCAL_SIZE=%d", + "-D T=%s -D FT=%s -D CONVERT_TO_T=%s -D CONVERT_TO_FT=%s%s " + "-D T1=%s -D CN=%d -D LOCAL_SIZE=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), ocl::convertTypeStr(float_depth, depth, channels, cvt[0], sizeof(cvt[0])), ocl::convertTypeStr(depth, float_depth, channels, cvt[1], sizeof(cvt[1])), diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index 8dc7233aab..0943e97ead 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -3396,8 +3396,8 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, { int wdepth = std::max(depth, CV_32S); char buf[2][50]; - cv::String compileOpts = format("-D USE_SAMPLER -D depth=%d -D T=%s -D T1=%s " - "-D convertToDT=%s -D cn=%d", + cv::String compileOpts = format("-D USE_SAMPLER -D SRC_DEPTH=%d -D T=%s -D T1=%s " + "-D CONVERT_TO_DT=%s -D CN=%d", depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::convertTypeStr(wdepth, depth, cn, buf[1], sizeof(buf[1])), cn); @@ -3461,8 +3461,8 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, Mat(1, static_cast(_buffer.size()), CV_8UC1, _buffer.data()).copyTo(coeffs); k.create("resizeLN", ocl::imgproc::resize_oclsrc, - format("-D INTER_LINEAR_INTEGER -D depth=%d -D T=%s -D T1=%s " - "-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d " + format("-D INTER_LINEAR_INTEGER -D SRC_DEPTH=%d -D T=%s -D T1=%s " + "-D WT=%s -D CONVERT_TO_WT=%s -D CONVERT_TO_DT=%s -D CN=%d " "-D INTER_RESIZE_COEF_BITS=%d", depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, buf[0], sizeof(buf[0])), @@ -3479,8 +3479,8 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, int wdepth = depth <= CV_8S ? CV_32S : std::max(depth, CV_32F); int wtype = CV_MAKETYPE(wdepth, cn); k.create("resizeLN", ocl::imgproc::resize_oclsrc, - format("-D INTER_LINEAR -D depth=%d -D T=%s -D T1=%s " - "-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d " + format("-D INTER_LINEAR -D SRC_DEPTH=%d -D T=%s -D T1=%s " + "-D WT=%s -D CONVERT_TO_WT=%s -D CONVERT_TO_DT=%s -D CN=%d " "-D INTER_RESIZE_COEF_BITS=%d", depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, buf[0], sizeof(buf[0])), @@ -3496,7 +3496,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, else if (interpolation == INTER_NEAREST) { k.create("resizeNN", ocl::imgproc::resize_oclsrc, - format("-D INTER_NEAREST -D T=%s -D T1=%s -D cn=%d", + format("-D INTER_NEAREST -D T=%s -D T1=%s -D CN=%d", ocl::vecopTypeToStr(type), ocl::vecopTypeToStr(depth), cn)); if (k.empty()) return false; @@ -3510,7 +3510,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, int wtype = CV_MAKE_TYPE(wdepth, cn); char cvt[2][50]; - String buildOption = format("-D INTER_AREA -D T=%s -D T1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d", + String buildOption = format("-D INTER_AREA -D T=%s -D T1=%s -D WTV=%s -D CONVERT_TO_WTV=%s -D CN=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt[0], sizeof(cvt[0])), cn); @@ -3520,7 +3520,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, if (is_area_fast) { int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn); - buildOption = buildOption + format(" -D convertToT=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST" + buildOption = buildOption + format(" -D CONVERT_TO_T=%s -D WT2V=%s -D CONVERT_TO_WT2V=%s -D INTER_AREA_FAST" " -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff", ocl::convertTypeStr(wdepth2, depth, cn, cvt[0], sizeof(cvt[0])), ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1], sizeof(cvt[1])), @@ -3532,7 +3532,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, } else { - buildOption = buildOption + format(" -D convertToT=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0], sizeof(cvt[0]))); + buildOption = buildOption + format(" -D CONVERT_TO_T=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0], sizeof(cvt[0]))); k.create("resizeAREA", ocl::imgproc::resize_oclsrc, buildOption); if (k.empty()) return false;