From 09735fd2080cfd7ded848095a1b37bd70b20b855 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Thu, 23 Dec 2010 09:24:33 +0000 Subject: [PATCH] added gpu::dft implemented via CUFFT --- modules/gpu/include/opencv2/gpu/gpu.hpp | 21 ++- modules/gpu/src/imgproc_gpu.cpp | 159 ++++++++++++++++++ tests/gpu/src/dft_routines.cpp | 211 +++++++++++++++++++++--- 3 files changed, 362 insertions(+), 29 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 71a053bab8..39836ec012 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -628,15 +628,28 @@ namespace cv //! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101); - //! performs per-element multiplication of two full (i.e. not packed) Fourier spectrums - //! supports only 32FC2 matrixes (interleaved format) + //! performs per-element multiplication of two full (not packed) Fourier spectrums + //! supports 32FC2 matrixes only (interleaved format) CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false); - //! performs per-element multiplication of two full (i.e. not packed) Fourier spectrums - //! supports only 32FC2 matrixes (interleaved format) + //! performs per-element multiplication of two full (not packed) Fourier spectrums + //! supports 32FC2 matrixes only (interleaved format) CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false); + //! performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix + //! + //! If the source matrix is not continous, then additional copy will be done, + //! so to avoid copying ensure the source matrix is continous one. + //! + //! Being implemented via CUFFT real-to-complex transform result contains only non-redundant values + //! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved. + //! + //! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format, which + //! doesn't allow us to retrieve parity of the destiantion matrix dimension (along which the first step + //! of DFT is performed). You must specifiy odd case explicitely. + CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, int flags=0, int nonZeroRows=0, bool odd=false); + //! computes convolution (or cross-correlation) of two images using discrete Fourier transform //! supports source images of 32FC1 type only //! result matrix will have 32FC1 type diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 4aaef14004..407e948ef9 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -76,6 +76,7 @@ void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { thro void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); } void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool) { throw_nogpu(); } void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool) { throw_nogpu(); } +void cv::gpu::dft(const GpuMat&, GpuMat&, int, int, bool) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); } @@ -1126,6 +1127,164 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, caller(a, b, scale, c); } +////////////////////////////////////////////////////////////////////////////// +// dft + +void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bool odd) +{ + CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); + + // We don't support unpacked output (in the case of real input) + CV_Assert(!(flags & DFT_COMPLEX_OUTPUT)); + + bool is_1d_input = (src.rows == 1) || (src.cols == 1); + int is_row_dft = flags & DFT_ROWS; + int is_scaled_dft = flags & DFT_SCALE; + int is_inverse = flags & DFT_INVERSE; + bool is_complex_input = src.channels() == 2; + bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + + // We don't support scaled transform + CV_Assert(!is_scaled_dft); + + // We don't support real-to-real transform + CV_Assert(is_complex_input || is_complex_output); + + GpuMat src_data, src_aux; + + // Make sure here we work with the continuous input, + // as CUFFT can't handle gaps + if (src.isContinuous()) + src_aux = src; + else + { + src_data = GpuMat(1, src.size().area(), src.type()); + src_aux = GpuMat(src.rows, src.cols, src.type(), src_data.ptr(), src.cols * src.elemSize()); + src.copyTo(src_aux); + + if (is_1d_input && !is_row_dft) + { + // If the source matrix is the single column + // reshape it into single row + int rows = std::min(src.rows, src.cols); + int cols = src.size().area() / rows; + src_aux = GpuMat(rows, cols, src.type(), src_data.ptr(), cols * src.elemSize()); + } + } + + cufftType dft_type = CUFFT_R2C; + if (is_complex_input) + dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R; + + int dft_cols = src_aux.cols; + if (is_complex_input && !is_complex_output) + dft_cols = (src_aux.cols - 1) * 2 + (int)odd; + CV_Assert(dft_cols > 1); + + cufftHandle plan; + if (is_1d_input || is_row_dft) + cufftPlan1d(&plan, dft_cols, dft_type, src_aux.rows); + else + cufftPlan2d(&plan, src_aux.rows, dft_cols, dft_type); + + GpuMat dst_data, dst_aux; + int dst_cols, dst_rows; + bool is_dst_mem_good; + + if (is_complex_input) + { + if (is_complex_output) + { + is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2 + && dst.size().area() >= src.size().area(); + + if (is_dst_mem_good) + dst_data = dst; + else + { + dst_data.create(1, src.size().area(), CV_32FC2); + dst_aux = GpuMat(src.rows, src.cols, dst_data.type(), dst_data.ptr(), + src.cols * dst_data.elemSize()); + } + + cufftSafeCall(cufftExecC2C( + plan, src_data.ptr(), + dst_data.ptr(), + is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD)); + + if (!is_dst_mem_good) + { + dst.create(dst_aux.size(), dst_aux.type()); + dst_aux.copyTo(dst); + } + } + else + { + dst_rows = src.rows; + dst_cols = (src.cols - 1) * 2 + (int)odd; + if (src_aux.size() != src.size()) + { + dst_rows = (src.rows - 1) * 2 + (int)odd; + dst_cols = src.cols; + } + + is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32F + && dst.rows >= dst_rows && dst.cols >= dst_cols; + + if (is_dst_mem_good) + dst_data = dst; + else + { + dst_data.create(1, dst_rows * dst_cols, CV_32F); + dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(), + dst_cols * dst_data.elemSize()); + } + + cufftSafeCall(cufftExecC2R( + plan, src_data.ptr(), dst_data.ptr())); + + if (!is_dst_mem_good) + { + dst.create(dst_aux.size(), dst_aux.type()); + dst_aux.copyTo(dst); + } + } + } + else + { + dst_rows = src.rows; + dst_cols = src.cols / 2 + 1; + if (src_aux.size() != src.size()) + { + dst_rows = src.rows / 2 + 1; + dst_cols = src.cols; + } + + is_dst_mem_good = dst.isContinuous() && dst.type() == CV_32FC2 + && dst.rows >= dst_rows && dst.cols >= dst_cols; + + if (is_dst_mem_good) + dst_data = dst; + else + { + dst_data.create(1, dst_rows * dst_cols, CV_32FC2); + dst_aux = GpuMat(dst_rows, dst_cols, dst_data.type(), dst_data.ptr(), + dst_cols * dst_data.elemSize()); + } + + cufftSafeCall(cufftExecR2C( + plan, src_data.ptr(), dst_data.ptr())); + + if (!is_dst_mem_good) + { + dst.create(dst_aux.size(), dst_aux.type()); + dst_aux.copyTo(dst); + } + } + + cufftSafeCall(cufftDestroy(plan)); +} + ////////////////////////////////////////////////////////////////////////////// // crossCorr diff --git a/tests/gpu/src/dft_routines.cpp b/tests/gpu/src/dft_routines.cpp index b97df2f2dc..41ffedcdbf 100644 --- a/tests/gpu/src/dft_routines.cpp +++ b/tests/gpu/src/dft_routines.cpp @@ -53,13 +53,18 @@ struct CV_GpuMulSpectrumsTest: CvTest { try { - if (!test(1 + rand() % 100, 1 + rand() % 1000)) return; - if (!testConj(1 + rand() % 100, 1 + rand() % 1000)) return; - if (!testScaled(1 + rand() % 100, 1 + rand() % 1000)) return; - if (!testScaledConj(1 + rand() % 100, 1 + rand() % 1000)) return; + test(0); + testConj(0); + testScaled(0); + testScaledConj(0); + test(DFT_ROWS); + testConj(DFT_ROWS); + testScaled(DFT_ROWS); + testScaledConj(DFT_ROWS); } catch (const Exception& e) { + ts->printf(CvTS::CONSOLE, e.what()); if (!check_and_treat_gpu_exception(e, ts)) throw; return; } @@ -134,69 +139,225 @@ struct CV_GpuMulSpectrumsTest: CvTest return true; } - bool test(int cols, int rows) + void test(int flags) { + int cols = 1 + rand() % 100, rows = 1 + rand() % 1000; + Mat a, b; gen(cols, rows, a); gen(cols, rows, b); Mat c_gold; - mulSpectrums(a, b, c_gold, 0, false); + mulSpectrums(a, b, c_gold, flags, false); GpuMat d_c; - mulSpectrums(GpuMat(a), GpuMat(b), d_c, 0, false); + mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, false); - return cmp(c_gold, Mat(d_c)) - || (ts->printf(CvTS::CONSOLE, "test failed: cols=%d, rows=%d\n", cols, rows), false); + if (!cmp(c_gold, Mat(d_c))) + ts->printf(CvTS::CONSOLE, "test failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags); } - bool testConj(int cols, int rows) + void testConj(int flags) { + int cols = 1 + rand() % 100, rows = 1 + rand() % 1000; + Mat a, b; gen(cols, rows, a); gen(cols, rows, b); Mat c_gold; - mulSpectrums(a, b, c_gold, 0, true); + mulSpectrums(a, b, c_gold, flags, true); GpuMat d_c; - mulSpectrums(GpuMat(a), GpuMat(b), d_c, 0, true); + mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, true); - return cmp(c_gold, Mat(d_c)) - || (ts->printf(CvTS::CONSOLE, "testConj failed: cols=%d, rows=%d\n", cols, rows), false); + if (!cmp(c_gold, Mat(d_c))) + ts->printf(CvTS::CONSOLE, "testConj failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags); } - bool testScaled(int cols, int rows) + void testScaled(int flags) { + int cols = 1 + rand() % 100, rows = 1 + rand() % 1000; + Mat a, b; gen(cols, rows, a); gen(cols, rows, b); float scale = 1.f / a.size().area(); Mat c_gold; - mulSpectrums(a, b, c_gold, 0, false); + mulSpectrums(a, b, c_gold, flags, false); GpuMat d_c; - mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, 0, scale, false); + mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, false); - return cmpScaled(c_gold, Mat(d_c), scale) - || (ts->printf(CvTS::CONSOLE, "testScaled failed: cols=%d, rows=%d\n", cols, rows), false); + if (!cmpScaled(c_gold, Mat(d_c), scale)) + ts->printf(CvTS::CONSOLE, "testScaled failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags); } - bool testScaledConj(int cols, int rows) + void testScaledConj(int flags) { + int cols = 1 + rand() % 100, rows = 1 + rand() % 1000; + Mat a, b; gen(cols, rows, a); gen(cols, rows, b); float scale = 1.f / a.size().area(); Mat c_gold; - mulSpectrums(a, b, c_gold, 0, true); + mulSpectrums(a, b, c_gold, flags, true); GpuMat d_c; - mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, 0, scale, true); + mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, true); + + if (!cmpScaled(c_gold, Mat(d_c), scale)) + ts->printf(CvTS::CONSOLE, "testScaledConj failed: cols=%d, rows=%d, flags=%D\n", cols, rows, flags); + } +} CV_GpuMulSpectrumsTest_inst; + + +struct CV_GpuDftTest: CvTest +{ + CV_GpuDftTest(): CvTest("GPU-DftTest", "dft") {} + + void run(int) + { + try + { + int cols = 1 + rand() % 100, rows = 1 + rand() % 100; + + testC2C(cols, rows, 0, "no flags"); + testC2C(cols, rows + 1, 0, "no flags 0 1"); + testC2C(cols, rows + 1, 0, "no flags 1 0"); + testC2C(cols + 1, rows, 0, "no flags 1 1"); + testC2C(cols, rows, DFT_INVERSE, "DFT_INVERSE"); + testC2C(cols, rows, DFT_ROWS, "DFT_ROWS"); + testC2C(1, rows, 0, "single col"); + testC2C(cols, 1, 0, "single row"); + testC2C(1, rows, DFT_INVERSE, "single col inversed"); + testC2C(cols, 1, DFT_INVERSE, "single row inversed"); + testC2C(cols, 1, DFT_ROWS, "single row DFT_ROWS"); + testC2C(1, 2, 0, "size 1 2"); + testC2C(2, 1, 0, "size 2 1"); + + testR2CThenC2R(cols, rows, "sanity"); + testR2CThenC2R(cols, rows + 1, "sanity 0 1"); + testR2CThenC2R(cols + 1, rows, "sanity 1 0"); + testR2CThenC2R(cols + 1, rows + 1, "sanity 1 1"); + testR2CThenC2R(1, rows, "single col"); + testR2CThenC2R(1, rows + 1, "single col 1"); + testR2CThenC2R(cols, 1, "single row" );; + testR2CThenC2R(cols + 1, 1, "single row 1" );; + } + catch (const Exception& e) + { + ts->printf(CvTS::CONSOLE, e.what()); + if (!check_and_treat_gpu_exception(e, ts)) throw; + return; + } + } + + void gen(int cols, int rows, int cn, Mat& mat) + { + RNG rng; + mat.create(rows, cols, CV_MAKETYPE(CV_32F, cn)); + rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f)); + } - return cmpScaled(c_gold, Mat(d_c), scale) - || (ts->printf(CvTS::CONSOLE, "testScaledConj failed: cols=%d, rows=%d\n", cols, rows), false); + bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f, float scale=1.f) + { + if (gold.size() != mine.size()) + { + ts->printf(CvTS::CONSOLE, "bad sizes: gold: %d %d, mine: %d %d\n", gold.cols, gold.rows, mine.cols, mine.rows); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return false; + } + if (gold.depth() != mine.depth()) + { + ts->printf(CvTS::CONSOLE, "bad depth: gold=%d, mine=%d\n", gold.depth(), mine.depth()); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return false; + } + if (gold.channels() != mine.channels()) + { + ts->printf(CvTS::CONSOLE, "bad channel count: gold=%d, mine=%d\n", gold.channels(), mine.channels()); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return false; + } + for (int i = 0; i < gold.rows; ++i) + { + for (int j = 0; j < gold.cols * gold.channels(); ++j) + { + float gold_ = gold.at(i, j); + float mine_ = mine.at(i, j) * scale; + if (fabs(gold_ - mine_) > max_err) + { + ts->printf(CvTS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return false; + } + } + } + return true; + } + + void testC2C(int cols, int rows, int flags, const std::string& hint) + { + Mat a; + gen(cols, rows, 2, a); + + Mat b_gold; + dft(a, b_gold, flags); + + GpuMat d_b; + dft(GpuMat(a), d_b, flags); + + bool ok = true; + if (ok && d_b.depth() != CV_32F) + { + ts->printf(CvTS::CONSOLE, "bad depth: %d\n", d_b.depth()); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + ok = false; + } + if (ok && d_b.channels() != 2) + { + ts->printf(CvTS::CONSOLE, "bad channel count: %d\n", d_b.channels()); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + ok = false; + } + if (ok) ok = cmp(b_gold, Mat(d_b), rows * cols * 1e-5f); + if (!ok) + ts->printf(CvTS::CONSOLE, "testC2C failed: hint=%s, cols=%d, rows=%d, flags=%d\n", hint.c_str(), cols, rows, flags); + } + + void testR2CThenC2R(int cols, int rows, const std::string& hint) + { + Mat a; + gen(cols, rows, 1, a); + + bool odd = false; + if (a.cols == 1) odd = a.rows % 2 == 1; + else odd = a.cols % 2 == 1; + bool ok = true; + + GpuMat d_b; + GpuMat d_c; + dft(GpuMat(a), d_b, 0); + dft(d_b, d_c, DFT_REAL_OUTPUT, 0, odd); + + if (ok && d_c.depth() != CV_32F) + { + ts->printf(CvTS::CONSOLE, "bad depth: %d\n", d_c.depth()); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + ok = false; + } + if (ok && d_c.channels() != 1) + { + ts->printf(CvTS::CONSOLE, "bad channel count: %d\n", d_c.channels()); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + ok = false; + } + if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f, 1.f / (rows * cols)); + if (!ok) + ts->printf(CvTS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d\n", hint.c_str(), cols, rows); } -} CV_GpuMulSpectrumsTest_inst; \ No newline at end of file +} CV_GpuDftTest_inst; \ No newline at end of file