diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index a6ab7afb7e..4861207c0a 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3494,9 +3494,19 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL && ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL) { - bdepth = CV_32S; - kernelX.convertTo( kernelX, bdepth, 1 << shift_bits ); - kernelY.convertTo( kernelY, bdepth, 1 << shift_bits ); + if (ocl::Device::getDefault().isIntel()) + { + for (int i=0; i(0, i) = (float) cvRound(kernelX.at(0, i) * (1 << shift_bits)); + if (kernelX.data != kernelY.data) + for (int i=0; i(0, i) = (float) cvRound(kernelY.at(0, i) * (1 << shift_bits)); + } else + { + bdepth = CV_32S; + kernelX.convertTo( kernelX, bdepth, 1 << shift_bits ); + kernelY.convertTo( kernelY, bdepth, 1 << shift_bits ); + } int_arithm = true; } diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl index 13595058ff..afcdbea89c 100644 --- a/modules/imgproc/src/opencl/filterSepCol.cl +++ b/modules/imgproc/src/opencl/filterSepCol.cl @@ -97,15 +97,19 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse { temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x]; temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x]; -#ifndef INTEGER_ARITHMETIC - sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]); -#else +#if (defined(INTEGER_ARITHMETIC) && !INTEL_DEVICE) sum += mad24(temp[0],mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]); +#else + sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]); #endif } #ifdef INTEGER_ARITHMETIC +#ifdef INTEL_DEVICE + sum = (sum + (1 << (SHIFT_BITS-1))) / (1 << SHIFT_BITS); +#else sum = (sum + (1 << (SHIFT_BITS-1))) >> SHIFT_BITS; +#endif #endif // write the result to dst diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl index 472ac4c91e..8a317ae13d 100644 --- a/modules/imgproc/src/opencl/filterSepRow.cl +++ b/modules/imgproc/src/opencl/filterSepRow.cl @@ -141,12 +141,12 @@ #define DIG(a) a, __constant dstT1 mat_kernel[] = { COEFF }; -#ifndef INTEGER_ARITHMETIC -#define dstT4 float4 -#define convertDstVec convert_float4 -#else +#if (defined(INTEGER_ARITHMETIC) && !INTEL_DEVICE) #define dstT4 int4 #define convertDstVec convert_int4 +#else +#define dstT4 float4 +#define convertDstVec convert_float4 #endif __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel, int src_offset_x, int src_offset_y, @@ -263,10 +263,10 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel { temp[0] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset - i); temp[1] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset + i); -#ifndef INTEGER_ARITHMETIC - sum += mad(convertDstVec(temp[0]), mat_kernel[RADIUSX-i], convertDstVec(temp[1]) * mat_kernel[RADIUSX + i]); -#else +#if (defined(INTEGER_ARITHMETIC) && !INTEL_DEVICE) sum += mad24(convertDstVec(temp[0]), mat_kernel[RADIUSX-i], convertDstVec(temp[1]) * mat_kernel[RADIUSX + i]); +#else + sum += mad(convertDstVec(temp[0]), mat_kernel[RADIUSX-i], convertDstVec(temp[1]) * mat_kernel[RADIUSX + i]); #endif } @@ -368,10 +368,10 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse { temp[0] = LDS_DAT[l_y][l_x + RADIUSX - i]; temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i]; -#ifndef INTEGER_ARITHMETIC - sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]); -#else +#if (defined(INTEGER_ARITHMETIC) && !INTEL_DEVICE) sum += mad24(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]); +#else + sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]); #endif } diff --git a/modules/imgproc/src/opencl/filterSep_singlePass.cl b/modules/imgproc/src/opencl/filterSep_singlePass.cl index b8b812df46..3952577d77 100644 --- a/modules/imgproc/src/opencl/filterSep_singlePass.cl +++ b/modules/imgproc/src/opencl/filterSep_singlePass.cl @@ -162,10 +162,10 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int { sum = (WT) 0; for (i=0; i<=2*RADIUSY; i++) -#ifndef INTEGER_ARITHMETIC - sum = mad(lsmem[liy+i][clocX], mat_kernelY[i], sum); -#else +#if (defined(INTEGER_ARITHMETIC) && !INTEL_DEVICE) sum = mad24(lsmem[liy+i][clocX], mat_kernelY[i], sum); +#else + sum = mad(lsmem[liy+i][clocX], mat_kernelY[i], sum); #endif lsmemDy[liy][clocX] = sum; clocX += BLK_X; @@ -182,12 +182,18 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int // and calculate final result sum = 0.0f; for (i=0; i<=2*RADIUSX; i++) -#ifndef INTEGER_ARITHMETIC - sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum); -#else +#if (defined(INTEGER_ARITHMETIC) && !INTEL_DEVICE) sum = mad24(lsmemDy[liy][lix+i], mat_kernelX[i], sum); +#else + sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum); +#endif +#ifdef INTEGER_ARITHMETIC +#ifdef INTEL_DEVICE + sum = (sum + (1 << (SHIFT_BITS-1))) / (1 << SHIFT_BITS); +#else sum = (sum + (1 << (SHIFT_BITS-1))) >> SHIFT_BITS; +#endif #endif // store result into destination image