diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 5a1859e42a..807285e983 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -95,6 +95,18 @@ CV_EXPORTS void pow(InputArray src, double power, OutputArray dst, Stream& strea //! compares elements of two arrays (dst = src1 src2) CV_EXPORTS void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()); +//! performs per-elements bit-wise inversion +CV_EXPORTS void bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +//! calculates per-element bit-wise disjunction of two arrays +CV_EXPORTS void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +//! calculates per-element bit-wise conjunction of two arrays +CV_EXPORTS void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +//! calculates per-element bit-wise "exclusive or" operation +CV_EXPORTS void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -105,27 +117,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! performs per-elements bit-wise inversion -CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise disjunction of two arrays -CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise disjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise conjunction of two arrays -CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise conjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise "exclusive or" operation -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise "exclusive or" of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - //! pixel by pixel right shift of an image by a constant value //! supports 1, 3 and 4 channels images with integers elements CV_EXPORTS void rshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 25e8b30010..61c4338528 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -71,16 +71,13 @@ void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_or(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } void cv::gpu::rshift(const GpuMat&, Scalar_, GpuMat&, Stream&) { throw_no_cuda(); } @@ -1889,25 +1886,29 @@ void cv::gpu::compare(InputArray src1, InputArray src2, OutputArray dst, int cmp } ////////////////////////////////////////////////////////////////////////////// -// Unary bitwise logical operations +// bitwise_not namespace arithm { template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& s) +void cv::gpu::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& _stream) { using namespace arithm; + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); + const int depth = src.depth(); CV_Assert( depth <= CV_64F ); CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); const int bcols = (int) (src.cols * src.elemSize()); @@ -1941,6 +1942,16 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St ////////////////////////////////////////////////////////////////////////////// // Binary bitwise logical operations +namespace +{ + enum + { + BIT_OP_AND, + BIT_OP_OR, + BIT_OP_XOR + }; +} + namespace arithm { template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -1948,113 +1959,31 @@ namespace arithm template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s) -{ - using namespace arithm; - - const int depth = src1.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); - CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const int bcols = (int) (src1.cols * src1.elemSize()); - - if ((bcols & 3) == 0) - { - const int vcols = bcols >> 2; - - bitMatAnd( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) - { - const int vcols = bcols >> 1; - - bitMatAnd( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else - { - - bitMatAnd( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); - } -} - -void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s) +static void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int op) { using namespace arithm; - const int depth = src1.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); - CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const int bcols = (int) (src1.cols * src1.elemSize()); - - if ((bcols & 3) == 0) + typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + static const func_t funcs32[] = { - const int vcols = bcols >> 2; - - bitMatOr( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) + bitMatAnd, + bitMatOr, + bitMatXor + }; + static const func_t funcs16[] = { - const int vcols = bcols >> 1; - - bitMatOr( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else + bitMatAnd, + bitMatOr, + bitMatXor + }; + static const func_t funcs8[] = { + bitMatAnd, + bitMatOr, + bitMatXor + }; - bitMatOr( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); - } -} - -void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s) -{ - using namespace arithm; - - const int depth = src1.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); - CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); const int bcols = (int) (src1.cols * src1.elemSize()); @@ -2062,8 +1991,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c { const int vcols = bcols >> 2; - bitMatXor( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + funcs32[op](PtrStepSzb(src1.rows, vcols, src1.data, src1.step), PtrStepSzb(src1.rows, vcols, src2.data, src2.step), PtrStepSzb(src1.rows, vcols, dst.data, dst.step), mask, stream); @@ -2072,8 +2000,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c { const int vcols = bcols >> 1; - bitMatXor( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + funcs16[op](PtrStepSzb(src1.rows, vcols, src1.data, src1.step), PtrStepSzb(src1.rows, vcols, src2.data, src2.step), PtrStepSzb(src1.rows, vcols, dst.data, dst.step), mask, stream); @@ -2081,17 +2008,13 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c else { - bitMatXor( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); + funcs8[op](PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, stream); } } -////////////////////////////////////////////////////////////////////////////// -// Binary bitwise logical operations with scalars - namespace arithm { template void bitScalarAnd(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream); @@ -2179,18 +2102,34 @@ namespace }; } -void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) +static void bitScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op) { using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); - static const func_t funcs[5][4] = + static const func_t funcs[3][5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, - {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, - {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, + {0,0,0,0}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {0,0,0,0}, + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + }, + { + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, + {0,0,0,0}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {0,0,0,0}, + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + }, + { + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, + {0,0,0,0}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {0,0,0,0}, + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + } }; const int depth = src.depth(); @@ -2198,60 +2137,24 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + CV_Assert( mask.empty() ); - dst.create(src.size(), src.type()); - - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + funcs[op][depth][cn - 1](src, val, dst, StreamAccessor::getStream(stream)); } -void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) +void cv::gpu::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) { - using namespace arithm; - - typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); - static const func_t funcs[5][4] = - { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, - {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, - {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} - }; - - const int depth = src.depth(); - const int cn = src.channels(); - - CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); - CV_Assert( cn == 1 || cn == 3 || cn == 4 ); - - dst.create(src.size(), src.type()); - - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_OR); } -void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) +void cv::gpu::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) { - using namespace arithm; - - typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); - static const func_t funcs[5][4] = - { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, - {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, - {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} - }; - - const int depth = src.depth(); - const int cn = src.channels(); - - CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); - CV_Assert( cn == 1 || cn == 3 || cn == 4 ); - - dst.create(src.size(), src.type()); + arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_AND); +} - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); +void cv::gpu::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) +{ + arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_XOR); } //////////////////////////////////////////////////////////////////////////////