From 716218cac4009a67eb95fc2570990c9e7a7fe266 Mon Sep 17 00:00:00 2001 From: vbystricky Date: Tue, 22 Jul 2014 13:22:40 +0400 Subject: [PATCH] Optimize ocl version of warp_affine --- modules/imgproc/src/opencl/warp_affine.cl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/imgproc/src/opencl/warp_affine.cl b/modules/imgproc/src/opencl/warp_affine.cl index 8ee34d0d65..649f10db7a 100644 --- a/modules/imgproc/src/opencl/warp_affine.cl +++ b/modules/imgproc/src/opencl/warp_affine.cl @@ -98,15 +98,15 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of { int round_delta = (AB_SCALE >> 1); - int X0_ = rint(M[0] * dx * AB_SCALE); - int Y0_ = rint(M[3] * dx * AB_SCALE); + int X0 = rint(fma(M[0], dx, fma(M[1], dy0, M[2])) * AB_SCALE) + round_delta; + int Y0 = rint(fma(M[3], dx, fma(M[4], dy0, M[5])) * AB_SCALE) + round_delta; + + int XSTEP = (int)(M[1] * AB_SCALE); + int YSTEP = (int)(M[4] * AB_SCALE); int dst_index = mad24(dy0, dst_step, mad24(dx, pixsize, dst_offset)); for (int dy = dy0, dy1 = min(dst_rows, dy0 + rowsPerWI); dy < dy1; ++dy, dst_index += dst_step) { - int X0 = X0_ + rint(fma(M[1], dy, M[2]) * AB_SCALE) + round_delta; - int Y0 = Y0_ + rint(fma(M[4], dy, M[5]) * AB_SCALE) + round_delta; - short sx = convert_short_sat(X0 >> AB_BITS); short sy = convert_short_sat(Y0 >> AB_BITS); @@ -117,6 +117,9 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of } else storepix(scalar, dstptr + dst_index); + + X0 += XSTEP; + Y0 += YSTEP; } } }