From d1f6a23abf25f7d75f8e297932da28bbd5ef8e37 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 22 Aug 2013 12:50:30 +0400 Subject: [PATCH] improved texture usage: * use explicit extrapolation only for sub-matrixes * use built-in interpolation for INTER_NEAREST mode --- modules/gpu/src/cuda/resize.cu | 41 ++++++++++++++++------------------ 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index ce4d96c248..6ecb7eb8ba 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -220,17 +220,21 @@ namespace cv { namespace gpu { namespace device { \ const dim3 block(32, 8); \ const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \ - bindTexture(&tex_resize_ ## type, srcWhole); \ - tex_resize_ ## type ## _reader texSrc; \ - texSrc.xoff = xoff; \ - texSrc.yoff = yoff; \ - if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \ + if (srcWhole.data == src.data) \ { \ + bindTexture(&tex_resize_ ## type, src); \ + tex_resize_ ## type ## _reader texSrc; \ + texSrc.xoff = 0; \ + texSrc.yoff = 0; \ Filter filteredSrc(texSrc); \ resize<<>>(filteredSrc, fx, fy, dst); \ } \ else \ { \ + bindTexture(&tex_resize_ ## type, srcWhole); \ + tex_resize_ ## type ## _reader texSrc; \ + texSrc.xoff = xoff; \ + texSrc.yoff = yoff; \ BrdReplicate< type > brd(src.rows, src.cols); \ BorderReader > brdSrc(texSrc, brd); \ Filter< BorderReader > > filteredSrc(brdSrc); \ @@ -250,18 +254,7 @@ namespace cv { namespace gpu { namespace device tex_resize_ ## type ## _reader texSrc; \ texSrc.xoff = xoff; \ texSrc.yoff = yoff; \ - if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \ - { \ - PointFilter filteredSrc(texSrc); \ - resize<<>>(filteredSrc, fx, fy, dst); \ - } \ - else \ - { \ - BrdReplicate< type > brd(src.rows, src.cols); \ - BorderReader > brdSrc(texSrc, brd); \ - PointFilter< BorderReader > > filteredSrc(brdSrc); \ - resize<<>>(filteredSrc, fx, fy, dst); \ - } \ + resize<<>>(texSrc, fx, fy, dst); \ cudaSafeCall( cudaGetLastError() ); \ cudaSafeCall( cudaDeviceSynchronize() ); \ } \ @@ -272,17 +265,21 @@ namespace cv { namespace gpu { namespace device { \ const dim3 block(32, 8); \ const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \ - bindTexture(&tex_resize_ ## type, srcWhole); \ - tex_resize_ ## type ## _reader texSrc; \ - texSrc.xoff = xoff; \ - texSrc.yoff = yoff; \ - if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \ + if (srcWhole.data == src.data) \ { \ + bindTexture(&tex_resize_ ## type, src); \ + tex_resize_ ## type ## _reader texSrc; \ + texSrc.xoff = 0; \ + texSrc.yoff = 0; \ LinearFilter filteredSrc(texSrc); \ resize<<>>(filteredSrc, fx, fy, dst); \ } \ else \ { \ + bindTexture(&tex_resize_ ## type, srcWhole); \ + tex_resize_ ## type ## _reader texSrc; \ + texSrc.xoff = xoff; \ + texSrc.yoff = yoff; \ BrdReplicate< type > brd(src.rows, src.cols); \ BorderReader > brdSrc(texSrc, brd); \ LinearFilter< BorderReader > > filteredSrc(brdSrc); \