From be22891f7112d87e70f20a8c3d8396afeb3737d5 Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Tue, 25 Dec 2012 19:53:58 +0400 Subject: [PATCH 1/3] added GPU_PERF_TESTP_P but haven't switched test to it (only perf4au app) --- modules/gpu/app/nv_perf_test/main.cpp | 61 +++++++++++------------ modules/gpu/src/cuda/optflowbm.cu | 10 ++-- modules/ts/include/opencv2/ts/ts_perf.hpp | 15 ++++++ 3 files changed, 50 insertions(+), 36 deletions(-) diff --git a/modules/gpu/app/nv_perf_test/main.cpp b/modules/gpu/app/nv_perf_test/main.cpp index 928b30a19e..04b4f6f815 100644 --- a/modules/gpu/app/nv_perf_test/main.cpp +++ b/modules/gpu/app/nv_perf_test/main.cpp @@ -75,8 +75,7 @@ int main(int argc, char* argv[]) DEF_PARAM_TEST_1(Image, std::string); -PERF_TEST_P(Image, HoughLinesP, - testing::Values(std::string("im1_1280x800.jpg"))) +GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg"))) { declare.time(30.0); @@ -125,8 +124,8 @@ PERF_TEST_P(Image, HoughLinesP, DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth); -PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, - testing::Combine( +GPU_PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, + testing::Combine( testing::Values(std::string("im1_1280x800.jpg")), testing::Values(CV_8U, CV_16U) )) @@ -193,12 +192,12 @@ typedef std::pair string_pair; DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool); -PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(CV_8U, CV_16U), - testing::Bool() - )) +GPU_PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(CV_8U, CV_16U), + testing::Bool() + )) { declare.time(60); @@ -287,11 +286,11 @@ PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth); -PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(CV_8U, CV_16U) - )) +GPU_PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(CV_8U, CV_16U) + )) { declare.time(500); @@ -384,15 +383,15 @@ void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr, DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size); -PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(cv::Size(16, 16)), - testing::Values(cv::Size(2, 2)), - testing::Values(cv::Size(16, 16)) - )) +GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(cv::Size(16, 16)), + testing::Values(cv::Size(2, 2)), + testing::Values(cv::Size(16, 16)) + )) { - declare.time(1000); + declare.time(3000); const string_pair fileNames = std::tr1::get<0>(GetParam()); const cv::Size block_size = std::tr1::get<1>(GetParam()); @@ -435,15 +434,15 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, SANITY_CHECK(0); } -PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(cv::Size(16, 16)), - testing::Values(cv::Size(1, 1)), - testing::Values(cv::Size(16, 16)) - )) +GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(cv::Size(16, 16)), + testing::Values(cv::Size(1, 1)), + testing::Values(cv::Size(16, 16)) + )) { - declare.time(1000); + declare.time(3000); const string_pair fileNames = std::tr1::get<0>(GetParam()); const cv::Size block_size = std::tr1::get<1>(GetParam()); diff --git a/modules/gpu/src/cuda/optflowbm.cu b/modules/gpu/src/cuda/optflowbm.cu index f9090abdc0..baf8dfb362 100644 --- a/modules/gpu/src/cuda/optflowbm.cu +++ b/modules/gpu/src/cuda/optflowbm.cu @@ -210,7 +210,7 @@ namespace optflowbm_fast { } - __device__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + __device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const { for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) { @@ -246,7 +246,7 @@ namespace optflowbm_fast } } - __device__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + __device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const { for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) { @@ -271,7 +271,7 @@ namespace optflowbm_fast } } - __device__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + __device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const { int ay = i; int ax = j + block_radius; @@ -298,7 +298,7 @@ namespace optflowbm_fast } } - __device__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const + __device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const { int bestDist = numeric_limits::max(); int bestInd = -1; @@ -328,7 +328,7 @@ namespace optflowbm_fast } } - __device__ void operator()(PtrStepf velx, PtrStepf vely) const + __device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const { int tbx = blockIdx.x * TILE_COLS; int tby = blockIdx.y * TILE_ROWS; diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index 0af6718085..c47ba95c69 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -474,6 +474,21 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ void fixture##_##name::PerfTestBody() +#define GPU_PERF_TEST_P(fixture, name, params) \ + class fixture##_##name : public fixture {\ + public:\ + fixture##_##name() {}\ + protected:\ + virtual void PerfTestBody();\ + };\ + TEST_P(fixture##_##name, name /*perf*/) \ + { \ + try { RunPerfTestBody(); } \ + catch (...) { cv::gpu::resetDevice(); throw; } \ + } \ + INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ + void fixture##_##name::PerfTestBody() + #define CV_PERF_TEST_MAIN(testsuitname, ...) \ int main(int argc, char **argv)\ From 389ecbe96d03ffe4c6d4b31efb147d5347ced24d Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Tue, 25 Dec 2012 21:57:10 +0400 Subject: [PATCH 2/3] warnings, renamed Marina's GPU_PERF_TEST_P --- modules/gpu/app/nv_perf_test/main.cpp | 4 ++-- modules/gpu/perf/perf_imgproc.cpp | 4 ++-- modules/gpu/perf/perf_softcascade.cpp | 12 ++++++------ modules/gpu/src/softcascade.cpp | 10 +++++----- samples/gpu/softcascade.cpp | 3 ++- 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/modules/gpu/app/nv_perf_test/main.cpp b/modules/gpu/app/nv_perf_test/main.cpp index 04b4f6f815..ff15581e80 100644 --- a/modules/gpu/app/nv_perf_test/main.cpp +++ b/modules/gpu/app/nv_perf_test/main.cpp @@ -81,8 +81,8 @@ GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jp std::string fileName = GetParam(); - const double rho = 1.0; - const double theta = 1.0; + const float rho = 1.f; + const float theta = 1.f; const int threshold = 40; const int minLineLenght = 20; const int maxLineGap = 5; diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index ee0968442c..56cb257c2c 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -1805,8 +1805,8 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s std::string fileName = getDataPath(GetParam()); - const double rho = 1.0f; - const double theta = CV_PI / 180.0; + const float rho = 1.0f; + const float theta = static_cast(CV_PI / 180.0); const int threshold = 100; const int minLineLenght = 50; const int maxLineGap = 5; diff --git a/modules/gpu/perf/perf_softcascade.cpp b/modules/gpu/perf/perf_softcascade.cpp index e9437d70f9..32e41a432a 100644 --- a/modules/gpu/perf/perf_softcascade.cpp +++ b/modules/gpu/perf/perf_softcascade.cpp @@ -1,6 +1,6 @@ #include "perf_precomp.hpp" -#define GPU_PERF_TEST_P(fixture, name, params) \ +#define PERF_TEST_P1(fixture, name, params) \ class fixture##_##name : public fixture {\ public:\ fixture##_##name() {}\ @@ -52,7 +52,7 @@ namespace { typedef std::tr1::tuple fixture_t; typedef perf::TestBaseWithParam SCascadeTest; -GPU_PERF_TEST_P(SCascadeTest, detect, +PERF_TEST_P1(SCascadeTest, detect, testing::Combine( testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")))) @@ -108,7 +108,7 @@ static cv::Rect getFromTable(int idx) typedef std::tr1::tuple roi_fixture_t; typedef perf::TestBaseWithParam SCascadeTestRoi; -GPU_PERF_TEST_P(SCascadeTestRoi, detectInRoi, +PERF_TEST_P1(SCascadeTestRoi, detectInRoi, testing::Combine( testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")), @@ -152,7 +152,7 @@ RUN_GPU(SCascadeTestRoi, detectInRoi) NO_CPU(SCascadeTestRoi, detectInRoi) -GPU_PERF_TEST_P(SCascadeTestRoi, detectEachRoi, +PERF_TEST_P1(SCascadeTestRoi, detectEachRoi, testing::Combine( testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")), @@ -191,7 +191,7 @@ RUN_GPU(SCascadeTestRoi, detectEachRoi) NO_CPU(SCascadeTestRoi, detectEachRoi) -GPU_PERF_TEST_P(SCascadeTest, detectOnIntegral, +PERF_TEST_P1(SCascadeTest, detectOnIntegral, testing::Combine( testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/integrals.xml")))) @@ -239,7 +239,7 @@ RUN_GPU(SCascadeTest, detectOnIntegral) NO_CPU(SCascadeTest, detectOnIntegral) -GPU_PERF_TEST_P(SCascadeTest, detectStream, +PERF_TEST_P1(SCascadeTest, detectStream, testing::Combine( testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")))) diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index 695fab58c9..511c4525d7 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -276,8 +276,8 @@ struct cv::gpu::SCascade::Fields int dcs = 0; for (int sc = 0; sc < totals; ++sc) { - int width = ::std::max(0.0f, fw - (origObjWidth * scale)); - int height = ::std::max(0.0f, fh - (origObjHeight * scale)); + int width = (int)::std::max(0.0f, fw - (origObjWidth * scale)); + int height = (int)::std::max(0.0f, fh - (origObjHeight * scale)); float logScale = ::log(scale); int fit = fitOctave(voctaves, logScale); @@ -457,7 +457,7 @@ cv::gpu::SCascade::~SCascade() { delete fields; } bool cv::gpu::SCascade::load(const FileNode& fn) { if (fields) delete fields; - fields = Fields::parseCascade(fn, minScale, maxScale, scales, flags); + fields = Fields::parseCascade(fn, (float)minScale, (float)maxScale, scales, flags); return fields != 0; } @@ -488,7 +488,7 @@ void cv::gpu::SCascade::detect(InputArray _image, InputArray _rois, OutputArray { flds.update(image.rows, image.cols, flds.shrinkage); - if (flds.check(minScale, maxScale, scales)) + if (flds.check((float)minScale, (float)maxScale, scales)) flds.createLevels(image.rows, image.cols); flds.preprocessor->apply(image, flds.shrunk); @@ -672,4 +672,4 @@ cv::Ptr cv::gpu::ChannelsProcessor::create(const int cv::gpu::ChannelsProcessor::ChannelsProcessor() { } cv::gpu::ChannelsProcessor::~ChannelsProcessor() { } -#endif \ No newline at end of file +#endif diff --git a/samples/gpu/softcascade.cpp b/samples/gpu/softcascade.cpp index 66f82d50bd..5f1adaf6cf 100644 --- a/samples/gpu/softcascade.cpp +++ b/samples/gpu/softcascade.cpp @@ -98,7 +98,8 @@ int main(int argc, char** argv) std::cout << "working..." << std::endl; cv::imshow("Soft Cascade demo", result); - cv::waitKey(10); + if (27 == cv::waitKey(10)) + break; } return 0; From b1aa7aecf073a4fff61bbc2b88a0f1aee085b69c Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Wed, 26 Dec 2012 21:47:06 +0400 Subject: [PATCH 3/3] fixed CC for CAMRA --- cmake/OpenCVDetectCUDA.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index d6d5f3a98a..047da60610 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -34,7 +34,7 @@ if(CUDA_FOUND) message(STATUS "CUDA detected: " ${CUDA_VERSION}) if (CARMA) - set(CUDA_ARCH_BIN "3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") + set(CUDA_ARCH_BIN "2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_PTX "3.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") else() set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")