diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 79fd37f08a..555fa7b86c 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -355,11 +355,11 @@ CV_EXPORTS void gemm(InputArray src1, InputArray src2, double alpha, //! performs per-element multiplication of two full (not packed) Fourier spectrums //! supports 32FC2 matrixes only (interleaved format) -CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream = Stream::Null()); +CV_EXPORTS void mulSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, bool conjB=false, Stream& stream = Stream::Null()); //! performs per-element multiplication of two full (not packed) Fourier spectrums //! supports 32FC2 matrixes only (interleaved format) -CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()); +CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()); //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. //! Param dft_size is the size of DFT transform. @@ -372,7 +372,7 @@ CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c //! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved. //! //! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format. -CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); +CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); struct CV_EXPORTS ConvolveBuf { diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp index e632bfe5bb..88af76a170 100644 --- a/modules/gpuarithm/src/arithm.cpp +++ b/modules/gpuarithm/src/arithm.cpp @@ -49,10 +49,10 @@ using namespace cv::gpu; void cv::gpu::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); } -void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); } +void cv::gpu::mulSpectrums(InputArray, InputArray, OutputArray, int, bool, Stream&) { throw_no_cuda(); } +void cv::gpu::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, float, bool, Stream&) { throw_no_cuda(); } -void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int, Stream&) { throw_no_cuda(); } +void cv::gpu::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); } void cv::gpu::ConvolveBuf::create(Size, Size) { throw_no_cuda(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_no_cuda(); } @@ -308,12 +308,12 @@ namespace cv { namespace gpu { namespace cudev #endif -void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream) +void cv::gpu::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream) { #ifndef HAVE_CUFFT - (void) a; - (void) b; - (void) c; + (void) _src1; + (void) _src2; + (void) _dst; (void) flags; (void) conjB; (void) stream; @@ -322,16 +322,19 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag (void) flags; typedef void (*Caller)(const PtrStep, const PtrStep, PtrStepSz, cudaStream_t stream); - static Caller callers[] = { cudev::mulSpectrums, cudev::mulSpectrums_CONJ }; - CV_Assert(a.type() == b.type() && a.type() == CV_32FC2); - CV_Assert(a.size() == b.size()); + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - c.create(a.size(), CV_32FC2); + CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 ); + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), CV_32FC2); + GpuMat dst = _dst.getGpuMat(); Caller caller = callers[(int)conjB]; - caller(a, b, c, StreamAccessor::getStream(stream)); + caller(src1, src2, dst, StreamAccessor::getStream(stream)); #endif } @@ -349,12 +352,12 @@ namespace cv { namespace gpu { namespace cudev #endif -void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream) +void cv::gpu::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream) { #ifndef HAVE_CUFFT - (void) a; - (void) b; - (void) c; + (void) _src1; + (void) _src2; + (void) _dst; (void) flags; (void) scale; (void) conjB; @@ -366,53 +369,57 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, typedef void (*Caller)(const PtrStep, const PtrStep, float scale, PtrStepSz, cudaStream_t stream); static Caller callers[] = { cudev::mulAndScaleSpectrums, cudev::mulAndScaleSpectrums_CONJ }; - CV_Assert(a.type() == b.type() && a.type() == CV_32FC2); - CV_Assert(a.size() == b.size()); + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - c.create(a.size(), CV_32FC2); + CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2); + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), CV_32FC2); + GpuMat dst = _dst.getGpuMat(); Caller caller = callers[(int)conjB]; - caller(a, b, scale, c, StreamAccessor::getStream(stream)); + caller(src1, src2, scale, dst, StreamAccessor::getStream(stream)); #endif } ////////////////////////////////////////////////////////////////////////////// // dft -void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stream& stream) +void cv::gpu::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream) { #ifndef HAVE_CUFFT - (void) src; - (void) dst; + (void) _src; + (void) _dst; (void) dft_size; (void) flags; (void) stream; throw_no_cuda(); #else + GpuMat src = _src.getGpuMat(); - CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); + CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 ); // We don't support unpacked output (in the case of real input) - CV_Assert(!(flags & DFT_COMPLEX_OUTPUT)); + CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) ); - bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1); - int is_row_dft = flags & DFT_ROWS; - int is_scaled_dft = flags & DFT_SCALE; - int is_inverse = flags & DFT_INVERSE; - bool is_complex_input = src.channels() == 2; - bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + const bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1); + const bool is_row_dft = (flags & DFT_ROWS) != 0; + const bool is_scaled_dft = (flags & DFT_SCALE) != 0; + const bool is_inverse = (flags & DFT_INVERSE) != 0; + const bool is_complex_input = src.channels() == 2; + const bool is_complex_output = !(flags & DFT_REAL_OUTPUT); // We don't support real-to-real transform - CV_Assert(is_complex_input || is_complex_output); + CV_Assert( is_complex_input || is_complex_output ); - GpuMat src_data; + GpuMat src_cont = src; // Make sure here we work with the continuous input, // as CUFFT can't handle gaps - src_data = src; - createContinuous(src.rows, src.cols, src.type(), src_data); - if (src_data.data != src.data) - src.copyTo(src_data); + createContinuous(src.rows, src.cols, src.type(), src_cont); + if (src_cont.data != src.data) + src.copyTo(src_cont, stream); Size dft_size_opt = dft_size; if (is_1d_input && !is_row_dft) @@ -422,17 +429,17 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stre dft_size_opt.height = std::min(dft_size.width, dft_size.height); } + CV_Assert( dft_size_opt.width > 1 ); + cufftType dft_type = CUFFT_R2C; if (is_complex_input) dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R; - CV_Assert(dft_size_opt.width > 1); - cufftHandle plan; if (is_1d_input || is_row_dft) - cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height); + cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) ); else - cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type); + cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) ); cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) ); @@ -440,34 +447,40 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stre { if (is_complex_output) { - createContinuous(dft_size, CV_32FC2, dst); + createContinuous(dft_size, CV_32FC2, _dst); + GpuMat dst = _dst.getGpuMat(); + cufftSafeCall(cufftExecC2C( - plan, src_data.ptr(), dst.ptr(), + plan, src_cont.ptr(), dst.ptr(), is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD)); } else { - createContinuous(dft_size, CV_32F, dst); + createContinuous(dft_size, CV_32F, _dst); + GpuMat dst = _dst.getGpuMat(); + cufftSafeCall(cufftExecC2R( - plan, src_data.ptr(), dst.ptr())); + plan, src_cont.ptr(), dst.ptr())); } } else { // We could swap dft_size for efficiency. Here we must reflect it if (dft_size == dft_size_opt) - createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst); + createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst); else - createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst); + createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst); + + GpuMat dst = _dst.getGpuMat(); cufftSafeCall(cufftExecR2C( - plan, src_data.ptr(), dst.ptr())); + plan, src_cont.ptr(), dst.ptr())); } - cufftSafeCall(cufftDestroy(plan)); + cufftSafeCall( cufftDestroy(plan) ); if (is_scaled_dft) - multiply(dst, Scalar::all(1. / dft_size.area()), dst, 1, -1, stream); + gpu::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream); #endif }