From 8a799aa89a81ec75b5261d6abc180f4c56393052 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy <no@email> Date: Mon, 3 Oct 2011 14:05:52 +0000 Subject: [PATCH] Updated optimal block size estimation for the convolve() function --- modules/gpu/perf/perf_imgproc.cpp | 12 +++++++----- modules/gpu/perf/perf_utility.hpp | 1 + modules/gpu/src/imgproc.cpp | 11 ++++++++--- samples/gpu/performance/performance.cpp | 21 ++++++++++++++------- samples/gpu/performance/performance.h | 8 +++++++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index f239edb725..81dd559543 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()), SANITY_CHECK(dst_host); } -PERF_TEST_P(DevInfo_Size, convolve, testing::Combine(testing::ValuesIn(devices()), - testing::Values(GPU_TYPICAL_MAT_SIZES))) +PERF_TEST_P(DevInfo_Int_Int, convolve, testing::Combine(testing::ValuesIn(devices()), + testing::Values(512, 1024, 1536, 2048, 2560, 3072, 3584), + testing::Values(27, 32, 64))) { DeviceInfo devInfo = std::tr1::get<0>(GetParam()); - Size size = std::tr1::get<1>(GetParam()); + int image_size = std::tr1::get<1>(GetParam()); + int templ_size = std::tr1::get<2>(GetParam()); setDevice(devInfo.deviceID()); - Mat image_host(size, CV_32FC1); - Mat templ_host(size, CV_32FC1); + Mat image_host(image_size, image_size, CV_32FC1); + Mat templ_host(templ_size, templ_size, CV_32FC1); declare.in(image_host, templ_host, WARMUP_RNG); diff --git a/modules/gpu/perf/perf_utility.hpp b/modules/gpu/perf/perf_utility.hpp index a57e3671b2..17f9418136 100644 --- a/modules/gpu/perf/perf_utility.hpp +++ b/modules/gpu/perf/perf_utility.hpp @@ -32,6 +32,7 @@ struct CvtColorInfo typedef TestBaseWithParam<DeviceInfo> DevInfo; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size; +typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_Int_Int; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType; typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType; diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp index 8b86ce61f1..47b09986a4 100644 --- a/modules/gpu/src/imgproc.cpp +++ b/modules/gpu/src/imgproc.cpp @@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size) Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size) { int scale = 40; - Size bsize_min(1024, 1024); + Size bsize_min(512, 512); // Check whether we use Fermi generation or newer GPU if (DeviceInfo().majorVersion() >= 2) { - bsize_min.width = 2048; - bsize_min.height = 2048; + bsize_min.width = 1024; + bsize_min.height = 1024; } Size bsize(std::max(templ_size.width * scale, bsize_min.width), std::max(templ_size.height * scale, bsize_min.height)); + int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width; + int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height; + bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row; + bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col; + bsize.width = std::min(bsize.width, result_size.width); bsize.height = std::min(bsize.height, result_size.height); return bsize; diff --git a/samples/gpu/performance/performance.cpp b/samples/gpu/performance/performance.cpp index b9bbc85c11..6b1619e84b 100644 --- a/samples/gpu/performance/performance.cpp +++ b/samples/gpu/performance/performance.cpp @@ -8,9 +8,15 @@ using namespace cv; void TestSystem::run() { - // Run test initializers - vector<Runnable*>::iterator it = inits_.begin(); - for (; it != inits_.end(); ++it) + if (is_list_mode_) + { + for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it) + cout << (*it)->name() << endl; + return; + } + + // Run test initializers + for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it) { if ((*it)->name().find(test_filter_, 0) != string::npos) (*it)->run(); @@ -19,8 +25,7 @@ void TestSystem::run() printHeading(); // Run tests - it = tests_.begin(); - for (; it != tests_.end(); ++it) + for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it) { try { @@ -145,13 +150,15 @@ int main(int argc, char** argv) string key = argv[i]; if (key == "--help") { - cout << "Usage: performance_gpu [--filter <test_filter>] [--working-dir <working_dir_with_slash>]\n"; + cout << "Usage: performance_gpu [--ls] [--filter <test_filter>] [--workdir <working_dir_with_slash>]\n"; return 0; } if (key == "--filter" && i + 1 < argc) TestSystem::instance().setTestFilter(argv[++i]); - else if (key == "--working-dir" && i + 1 < argc) + else if (key == "--workdir" && i + 1 < argc) TestSystem::instance().setWorkingDir(argv[++i]); + else if (key == "--ls") + TestSystem::instance().setListMode(true); else { cout << "Unknown parameter: '" << key << "'" << endl; diff --git a/samples/gpu/performance/performance.h b/samples/gpu/performance/performance.h index 0031950475..007309bcf0 100644 --- a/samples/gpu/performance/performance.h +++ b/samples/gpu/performance/performance.h @@ -68,10 +68,14 @@ public: cur_subtest_is_empty_ = false; } + bool isListMode() const { return is_list_mode_; } + void setListMode(bool value) { is_list_mode_ = value; } + private: TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0), gpu_elapsed_(0), speedup_total_(0.0), - num_subtests_called_(0) {} + num_subtests_called_(0), + is_list_mode_(false) {} void finishCurrentSubtest(); void resetCurrentSubtest() @@ -100,6 +104,8 @@ private: double speedup_total_; int num_subtests_called_; + + bool is_list_mode_; };