From 936236e4b1b190d7bc33a33df982fac8ab6cfc76 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 11 Jun 2013 16:06:51 +0400 Subject: [PATCH] Extended the CPU/GPU selection mechanism in performance tests. Now it allows choosing between arbitrary implementation variants. --- modules/gpu/perf/perf_main.cpp | 2 +- modules/nonfree/perf/perf_main.cpp | 2 +- modules/superres/perf/perf_main.cpp | 2 +- modules/ts/include/opencv2/ts/ts_perf.hpp | 20 +++-- modules/ts/src/ts_perf.cpp | 101 ++++++++++++++-------- 5 files changed, 81 insertions(+), 46 deletions(-) diff --git a/modules/gpu/perf/perf_main.cpp b/modules/gpu/perf/perf_main.cpp index a7ac1ccce8..f9f3a68547 100644 --- a/modules/gpu/perf/perf_main.cpp +++ b/modules/gpu/perf/perf_main.cpp @@ -44,4 +44,4 @@ using namespace perf; -CV_PERF_TEST_MAIN(gpu, printCudaInfo()) +CV_PERF_TEST_MAIN_WITH_IMPLS(gpu, ("cuda", "plain"), printCudaInfo()) diff --git a/modules/nonfree/perf/perf_main.cpp b/modules/nonfree/perf/perf_main.cpp index de1242149e..373e08aedb 100644 --- a/modules/nonfree/perf/perf_main.cpp +++ b/modules/nonfree/perf/perf_main.cpp @@ -1,4 +1,4 @@ #include "perf_precomp.hpp" #include "opencv2/ts/gpu_perf.hpp" -CV_PERF_TEST_MAIN(nonfree, perf::printCudaInfo()) +CV_PERF_TEST_MAIN_WITH_IMPLS(nonfree, ("cuda", "plain"), perf::printCudaInfo()) diff --git a/modules/superres/perf/perf_main.cpp b/modules/superres/perf/perf_main.cpp index adc69e6e8b..90a7f51251 100644 --- a/modules/superres/perf/perf_main.cpp +++ b/modules/superres/perf/perf_main.cpp @@ -44,4 +44,4 @@ using namespace perf; -CV_PERF_TEST_MAIN(superres, printCudaInfo()) +CV_PERF_TEST_MAIN_WITH_IMPLS(superres, ("cuda", "plain"), printCudaInfo()) diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index fe57655157..eb5e3e554e 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -210,18 +210,13 @@ private: #define SANITY_CHECK_KEYPOINTS(array, ...) ::perf::Regression::addKeypoints(this, #array, array , ## __VA_ARGS__) #define SANITY_CHECK_MATCHES(array, ...) ::perf::Regression::addMatches(this, #array, array , ## __VA_ARGS__) -#ifdef HAVE_CUDA class CV_EXPORTS GpuPerf { public: static bool targetDevice(); }; -# define PERF_RUN_GPU() ::perf::GpuPerf::targetDevice() -#else -# define PERF_RUN_GPU() false -#endif - +#define PERF_RUN_GPU() ::perf::GpuPerf::targetDevice() /*****************************************************************************************\ * Container for performance metrics * @@ -263,7 +258,10 @@ public: TestBase(); static void Init(int argc, const char* const argv[]); + static void Init(const std::vector & availableImpls, + int argc, const char* const argv[]); static std::string getDataPath(const std::string& relativePath); + static std::string getSelectedImpl(); protected: virtual void PerfTestBody() = 0; @@ -476,18 +474,24 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ void fixture##_##name::PerfTestBody() +#define CV_PERF_UNWRAP_IMPLS(...) __VA_ARGS__ -#define CV_PERF_TEST_MAIN(testsuitname, ...) \ +// "plain" should always be one of the implementations +#define CV_PERF_TEST_MAIN_WITH_IMPLS(testsuitname, impls, ...) \ int main(int argc, char **argv)\ {\ while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/\ + std::string impls_[] = { CV_PERF_UNWRAP_IMPLS impls };\ ::perf::Regression::Init(#testsuitname);\ - ::perf::TestBase::Init(argc, argv);\ + ::perf::TestBase::Init(std::vector(impls_, impls_ + sizeof impls_ / sizeof *impls_),\ + argc, argv);\ ::testing::InitGoogleTest(&argc, argv);\ cvtest::printVersionInfo();\ return RUN_ALL_TESTS();\ } +#define CV_PERF_TEST_MAIN(testsuitname, ...) CV_PERF_TEST_MAIN_WITH_IMPLS(testsuitname, ("plain"), __VA_ARGS__) + #define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer()) #define TEST_CYCLE() for(; startTimer(), next(); stopTimer()) #define TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ++r) diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index c375e7c388..3b73ddcf73 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -14,30 +14,10 @@ int64 TestBase::timeLimitDefault = 0; unsigned int TestBase::iterationsLimitDefault = (unsigned int)(-1); int64 TestBase::_timeadjustment = 0; -const std::string command_line_keys = - "{ |perf_max_outliers |8 |percent of allowed outliers}" - "{ |perf_min_samples |10 |minimal required numer of samples}" - "{ |perf_force_samples |100 |force set maximum number of samples for all tests}" - "{ |perf_seed |809564 |seed for random numbers generator}" - "{ |perf_threads |-1 |the number of worker threads, if parallel execution is enabled}" - "{ |perf_write_sanity |false |create new records for sanity checks}" - "{ |perf_verify_sanity |false |fail tests having no regression data for sanity checks}" -#ifdef ANDROID - "{ |perf_time_limit |6.0 |default time limit for a single test (in seconds)}" - "{ |perf_affinity_mask |0 |set affinity mask for the main thread}" - "{ |perf_log_power_checkpoints | |additional xml logging for power measurement}" -#else - "{ |perf_time_limit |3.0 |default time limit for a single test (in seconds)}" -#endif - "{ |perf_max_deviation |1.0 |}" - "{h |help |false |print help info}" -#ifdef HAVE_CUDA - "{ |perf_run_cpu |false |run GPU performance tests for analogical CPU functions}" - "{ |perf_cuda_device |0 |run GPU test suite onto specific CUDA capable device}" - "{ |perf_cuda_info_only |false |print an information about system and an available CUDA devices and then exit.}" -#endif -; +// Item [0] will be considered the default implementation. +static std::vector available_impls; +static std::string param_impl; static double param_max_outliers; static double param_max_deviation; static unsigned int param_min_samples; @@ -48,7 +28,6 @@ static int param_threads; static bool param_write_sanity; static bool param_verify_sanity; #ifdef HAVE_CUDA -static bool param_run_cpu; static int param_cuda_device; #endif @@ -577,11 +556,12 @@ Regression& Regression::operator() (const std::string& name, cv::InputArray arra std::string nodename = getCurrentTestNodeName(); -#ifdef HAVE_CUDA - static const std::string prefix = (param_run_cpu)? "CPU_" : "GPU_"; + // This is a hack for compatibility and it should eventually get removed. + // gpu's tests don't even have CPU sanity data anymore. if(suiteName == "gpu") - nodename = prefix + nodename; -#endif + { + nodename = (PERF_RUN_GPU() ? "GPU_" : "CPU_") + nodename; + } cv::FileNode n = rootIn[nodename]; if(n.isNone()) @@ -646,6 +626,42 @@ performance_metrics::performance_metrics() void TestBase::Init(int argc, const char* const argv[]) { + std::vector plain_only; + plain_only.push_back("plain"); + TestBase::Init(plain_only, argc, argv); +} + +void TestBase::Init(const std::vector & availableImpls, + int argc, const char* const argv[]) +{ + available_impls = availableImpls; + + const std::string command_line_keys = + "{ |perf_max_outliers |8 |percent of allowed outliers}" + "{ |perf_min_samples |10 |minimal required numer of samples}" + "{ |perf_force_samples |100 |force set maximum number of samples for all tests}" + "{ |perf_seed |809564 |seed for random numbers generator}" + "{ |perf_threads |-1 |the number of worker threads, if parallel execution is enabled}" + "{ |perf_write_sanity |false |create new records for sanity checks}" + "{ |perf_verify_sanity |false |fail tests having no regression data for sanity checks}" + "{ |perf_impl |" + available_impls[0] + + "|the implementation variant of functions under test}" + "{ |perf_run_cpu |false |deprecated, equivalent to --perf_impl=plain}" +#ifdef ANDROID + "{ |perf_time_limit |6.0 |default time limit for a single test (in seconds)}" + "{ |perf_affinity_mask |0 |set affinity mask for the main thread}" + "{ |perf_log_power_checkpoints | |additional xml logging for power measurement}" +#else + "{ |perf_time_limit |3.0 |default time limit for a single test (in seconds)}" +#endif + "{ |perf_max_deviation |1.0 |}" + "{h |help |false |print help info}" +#ifdef HAVE_CUDA + "{ |perf_cuda_device |0 |run GPU test suite onto specific CUDA capable device}" + "{ |perf_cuda_info_only |false |print an information about system and an available CUDA devices and then exit.}" +#endif + ; + cv::CommandLineParser args(argc, argv, command_line_keys.c_str()); if (args.get("help")) { @@ -656,6 +672,7 @@ void TestBase::Init(int argc, const char* const argv[]) ::testing::AddGlobalTestEnvironment(new PerfEnvironment); + param_impl = args.get("perf_run_cpu") ? "plain" : args.get("perf_impl"); param_max_outliers = std::min(100., std::max(0., args.get("perf_max_outliers"))); param_min_samples = std::max(1u, args.get("perf_min_samples")); param_max_deviation = std::max(0., args.get("perf_max_deviation")); @@ -670,19 +687,28 @@ void TestBase::Init(int argc, const char* const argv[]) log_power_checkpoints = args.get("perf_log_power_checkpoints"); #endif + if (std::find(available_impls.begin(), available_impls.end(), param_impl) == available_impls.end()) + { + printf("No such implementation: %s\n", param_impl.c_str()); + exit(1); + } + #ifdef HAVE_CUDA bool printOnly = args.get("perf_cuda_info_only"); if (printOnly) exit(0); +#endif + + if (available_impls.size() > 1) + printf("[----------]\n[ INFO ] \tImplementation variant: %s.\n[----------]\n", param_impl.c_str()), fflush(stdout); + +#ifdef HAVE_CUDA - param_run_cpu = args.get("perf_run_cpu"); param_cuda_device = std::max(0, std::min(cv::gpu::getCudaEnabledDeviceCount(), args.get("perf_cuda_device"))); - if (param_run_cpu) - printf("[----------]\n[ GPU INFO ] \tRun test suite on CPU.\n[----------]\n"), fflush(stdout); - else + if (param_impl == "cuda") { cv::gpu::DeviceInfo info(param_cuda_device); if (!info.isCompatible()) @@ -708,6 +734,13 @@ void TestBase::Init(int argc, const char* const argv[]) _timeadjustment = _calibrate(); } + +std::string TestBase::getSelectedImpl() +{ + return param_impl; +} + + int64 TestBase::_calibrate() { class _helper : public ::perf::TestBase @@ -1325,12 +1358,10 @@ void perf::sort(std::vector& pts, cv::InputOutputArray descriptors /*****************************************************************************************\ * ::perf::GpuPerf \*****************************************************************************************/ -#ifdef HAVE_CUDA bool perf::GpuPerf::targetDevice() { - return !param_run_cpu; + return param_impl == "cuda"; } -#endif /*****************************************************************************************\ * ::perf::PrintTo