From 6702d557111bacd4f67a0d99a72275b5a5593f33 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Fri, 24 Dec 2010 06:48:23 +0000 Subject: [PATCH] added support of scaling into gpu::dft, refactored gpu::convolve --- modules/gpu/src/cuda/imgproc.cu | 8 -------- modules/gpu/src/imgproc_gpu.cpp | 26 +++++++++++++++----------- tests/gpu/src/dft_routines.cpp | 8 ++++---- 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 7b66565bcd..7d3ff893b4 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -752,7 +752,6 @@ namespace cv { namespace gpu { namespace imgproc ////////////////////////////////////////////////////////////////////////// // mulSpectrums - __global__ void mulSpectrumsKernel(const PtrStep_ a, const PtrStep_ b, DevMem2D_ c) { @@ -776,11 +775,9 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall(cudaThreadSynchronize()); } - ////////////////////////////////////////////////////////////////////////// // mulSpectrums_CONJ - __global__ void mulSpectrumsKernel_CONJ( const PtrStep_ a, const PtrStep_ b, DevMem2D_ c) @@ -805,11 +802,9 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall(cudaThreadSynchronize()); } - ////////////////////////////////////////////////////////////////////////// // mulAndScaleSpectrums - __global__ void mulAndScaleSpectrumsKernel( const PtrStep_ a, const PtrStep_ b, float scale, DevMem2D_ c) @@ -835,11 +830,9 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall(cudaThreadSynchronize()); } - ////////////////////////////////////////////////////////////////////////// // mulAndScaleSpectrums_CONJ - __global__ void mulAndScaleSpectrumsKernel_CONJ( const PtrStep_ a, const PtrStep_ b, float scale, DevMem2D_ c) @@ -865,6 +858,5 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall(cudaThreadSynchronize()); } - }}} diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 8a94630a43..76e079a0af 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -1144,9 +1144,6 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo bool is_complex_input = src.channels() == 2; bool is_complex_output = !(flags & DFT_REAL_OUTPUT); - // We don't support scaled transform - CV_Assert(!is_scaled_dft); - // We don't support real-to-real transform CV_Assert(is_complex_input || is_complex_output); @@ -1178,6 +1175,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo if (is_complex_input) dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R; + int dft_rows = src_aux.rows; int dft_cols = src_aux.cols; if (is_complex_input && !is_complex_output) dft_cols = (src_aux.cols - 1) * 2 + (int)odd; @@ -1185,9 +1183,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo cufftHandle plan; if (is_1d_input || is_row_dft) - cufftPlan1d(&plan, dft_cols, dft_type, src_aux.rows); + cufftPlan1d(&plan, dft_cols, dft_type, dft_rows); else - cufftPlan2d(&plan, src_aux.rows, dft_cols, dft_type); + cufftPlan2d(&plan, dft_rows, dft_cols, dft_type); GpuMat dst_data, dst_aux; int dst_cols, dst_rows; @@ -1285,6 +1283,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo } cufftSafeCall(cufftDestroy(plan)); + + if (is_scaled_dft) + multiply(dst, Scalar::all(1. / (dft_rows * dft_cols)), dst); } ////////////////////////////////////////////////////////////////////////////// @@ -1293,7 +1294,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo namespace { // Estimates optimal block size - void crossCorrOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh) + void convolveOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh) { int major, minor; getComputeCapability(getDevice(), major, minor); @@ -1329,7 +1330,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); Size block_size; - crossCorrOptBlockSize(result.cols, result.rows, templ.cols, templ.rows, + convolveOptBlockSize(result.cols, result.rows, templ.cols, templ.rows, block_size.width, block_size.height); Size dft_size; @@ -1367,10 +1368,11 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, { for (int x = 0; x < result.cols; x += block_size.width) { - // Locate ROI in the source matrix Size image_roi_size; image_roi_size.width = std::min(x + dft_size.width, image.cols) - x; image_roi_size.height = std::min(y + dft_size.height, image.rows) - y; + + // Locate ROI in the source matrix GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr(y) + x), image.step); // Make source image block continous @@ -1386,14 +1388,16 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr(), result_data.ptr())); - // Copy result block into appropriate part of the result matrix. - // We can't compute it inplace as the result of the CUFFT transforms - // is always continous, while the result matrix and its blocks can have gaps. Size result_roi_size; result_roi_size.width = std::min(x + block_size.width, result.cols) - x; result_roi_size.height = std::min(y + block_size.height, result.rows) - y; + GpuMat result_roi(result_roi_size, CV_32F, (void*)(result.ptr(y) + x), result.step); GpuMat result_block(result_roi_size, CV_32F, result_data.ptr(), dft_size.width * sizeof(cufftReal)); + + // Copy result block into appropriate part of the result matrix. + // We can't compute it inplace as the result of the CUFFT transforms + // is always continous, while the result matrix and its blocks can have gaps. result_block.copyTo(result_roi); } } diff --git a/tests/gpu/src/dft_routines.cpp b/tests/gpu/src/dft_routines.cpp index 6c8d5c7dc2..0bda401c95 100644 --- a/tests/gpu/src/dft_routines.cpp +++ b/tests/gpu/src/dft_routines.cpp @@ -274,7 +274,7 @@ struct CV_GpuDftTest: CvTest rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f)); } - bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f, float scale=1.f) + bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f) { if (gold.size() != mine.size()) { @@ -299,7 +299,7 @@ struct CV_GpuDftTest: CvTest for (int j = 0; j < gold.cols * gold.channels(); ++j) { float gold_ = gold.at(i, j); - float mine_ = mine.at(i, j) * scale; + float mine_ = mine.at(i, j); if (fabs(gold_ - mine_) > max_err) { ts->printf(CvTS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_); @@ -382,7 +382,7 @@ struct CV_GpuDftTest: CvTest d_c = GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize()); } dft(GpuMat(a), d_b, 0); - dft(d_b, d_c, DFT_REAL_OUTPUT, 0, odd); + dft(d_b, d_c, DFT_REAL_OUTPUT | DFT_SCALE, 0, odd); if (ok && inplace && d_b.ptr() != d_b_data.ptr()) { @@ -408,7 +408,7 @@ struct CV_GpuDftTest: CvTest ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); ok = false; } - if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f, 1.f / (rows * cols)); + if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f); if (!ok) ts->printf(CvTS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d\n", hint.c_str(), cols, rows); }