diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index 08e749f8b6..dddbd6f7e8 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -280,13 +280,19 @@ namespace cv { namespace gpu { namespace mathfunc enum { UN_OP_NOT }; template - struct UnOp { __device__ T operator()(T lhs, T rhs); }; + struct UnOp; template - struct UnOp{ __device__ T operator()(T x) { return ~x; } }; + struct UnOp + { + static __device__ T call(T x) + { + return ~x; + } + }; template - __global__ void bitwise_un_op(int rows, int cols, const PtrStep src, PtrStep dst, UnOp op, Mask mask) + __global__ void bitwise_un_op(int rows, int cols, const PtrStep src, PtrStep dst, Mask mask) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -298,7 +304,7 @@ namespace cv { namespace gpu { namespace mathfunc #pragma unroll for (int i = 0; i < cn; ++i) - dsty[cn * x + i] = op(srcy[cn * x + i]); + dsty[cn * x + i] = UnOp::call(srcy[cn * x + i]); } } @@ -309,16 +315,36 @@ namespace cv { namespace gpu { namespace mathfunc dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); switch (elem_size) { - case 1: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 2: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 3: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 4: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 6: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 8: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 12: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 16: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 24: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; - case 32: bitwise_un_op<<>>(rows, cols, src, dst, UnOp(), mask); break; + case 1: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 2: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 3: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 4: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 6: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 8: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 12: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 16: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 24: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; + case 32: + bitwise_un_op ><<>>(rows, cols, src, dst, mask); + break; } if (stream == 0) cudaSafeCall(cudaThreadSynchronize()); } @@ -338,19 +364,37 @@ namespace cv { namespace gpu { namespace mathfunc enum { BIN_OP_OR, BIN_OP_AND, BIN_OP_XOR }; template - struct BinOp { __device__ T operator()(T lhs, T rhs); }; + struct BinOp; template - struct BinOp{ __device__ T operator()(T lhs, T rhs) { return lhs | rhs; } }; + struct BinOp + { + static __device__ T call(T lhs, T rhs) + { + return lhs | rhs; + } + }; template - struct BinOp{ __device__ T operator()(T lhs, T rhs) { return lhs & rhs; } }; + struct BinOp + { + static __device__ T call(T lhs, T rhs) + { + return lhs & rhs; + } + }; template - struct BinOp{ __device__ T operator()(T lhs, T rhs) { return lhs ^ rhs; } }; + struct BinOp + { + static __device__ T call(T lhs, T rhs) + { + return lhs ^ rhs; + } + }; template - __global__ void bitwise_bin_op(int rows, int cols, const PtrStep src1, const PtrStep src2, PtrStep dst, BinOp op, Mask mask) + __global__ void bitwise_bin_op(int rows, int cols, const PtrStep src1, const PtrStep src2, PtrStep dst, Mask mask) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; @@ -363,7 +407,7 @@ namespace cv { namespace gpu { namespace mathfunc #pragma unroll for (int i = 0; i < cn; ++i) - dsty[cn * x + i] = op(src1y[cn * x + i], src2y[cn * x + i]); + dsty[cn * x + i] = BinOp::call(src1y[cn * x + i], src2y[cn * x + i]); } } @@ -374,16 +418,36 @@ namespace cv { namespace gpu { namespace mathfunc dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); switch (elem_size) { - case 1: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 2: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 3: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 4: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 6: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 8: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 12: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 16: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 24: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; - case 32: bitwise_bin_op<<>>(rows, cols, src1, src2, dst, BinOp(), mask); break; + case 1: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 2: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 3: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 4: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 6: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 8: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 12: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 16: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 24: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; + case 32: + bitwise_bin_op ><<>>(rows, cols, src1, src2, dst, mask); + break; } if (stream == 0) cudaSafeCall(cudaThreadSynchronize()); }