diff --git a/modules/gpu/app/nv_perf_test/main.cpp b/modules/gpu/app/nv_perf_test/main.cpp index 928b30a19e..04b4f6f815 100644 --- a/modules/gpu/app/nv_perf_test/main.cpp +++ b/modules/gpu/app/nv_perf_test/main.cpp @@ -75,8 +75,7 @@ int main(int argc, char* argv[]) DEF_PARAM_TEST_1(Image, std::string); -PERF_TEST_P(Image, HoughLinesP, - testing::Values(std::string("im1_1280x800.jpg"))) +GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg"))) { declare.time(30.0); @@ -125,8 +124,8 @@ PERF_TEST_P(Image, HoughLinesP, DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth); -PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, - testing::Combine( +GPU_PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, + testing::Combine( testing::Values(std::string("im1_1280x800.jpg")), testing::Values(CV_8U, CV_16U) )) @@ -193,12 +192,12 @@ typedef std::pair string_pair; DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool); -PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(CV_8U, CV_16U), - testing::Bool() - )) +GPU_PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(CV_8U, CV_16U), + testing::Bool() + )) { declare.time(60); @@ -287,11 +286,11 @@ PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth); -PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(CV_8U, CV_16U) - )) +GPU_PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(CV_8U, CV_16U) + )) { declare.time(500); @@ -384,15 +383,15 @@ void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr, DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size); -PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(cv::Size(16, 16)), - testing::Values(cv::Size(2, 2)), - testing::Values(cv::Size(16, 16)) - )) +GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(cv::Size(16, 16)), + testing::Values(cv::Size(2, 2)), + testing::Values(cv::Size(16, 16)) + )) { - declare.time(1000); + declare.time(3000); const string_pair fileNames = std::tr1::get<0>(GetParam()); const cv::Size block_size = std::tr1::get<1>(GetParam()); @@ -435,15 +434,15 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, SANITY_CHECK(0); } -PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, - testing::Combine( - testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), - testing::Values(cv::Size(16, 16)), - testing::Values(cv::Size(1, 1)), - testing::Values(cv::Size(16, 16)) - )) +GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(cv::Size(16, 16)), + testing::Values(cv::Size(1, 1)), + testing::Values(cv::Size(16, 16)) + )) { - declare.time(1000); + declare.time(3000); const string_pair fileNames = std::tr1::get<0>(GetParam()); const cv::Size block_size = std::tr1::get<1>(GetParam()); diff --git a/modules/gpu/src/cuda/optflowbm.cu b/modules/gpu/src/cuda/optflowbm.cu index f9090abdc0..baf8dfb362 100644 --- a/modules/gpu/src/cuda/optflowbm.cu +++ b/modules/gpu/src/cuda/optflowbm.cu @@ -210,7 +210,7 @@ namespace optflowbm_fast { } - __device__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + __device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const { for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) { @@ -246,7 +246,7 @@ namespace optflowbm_fast } } - __device__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + __device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const { for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) { @@ -271,7 +271,7 @@ namespace optflowbm_fast } } - __device__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + __device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const { int ay = i; int ax = j + block_radius; @@ -298,7 +298,7 @@ namespace optflowbm_fast } } - __device__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const + __device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const { int bestDist = numeric_limits::max(); int bestInd = -1; @@ -328,7 +328,7 @@ namespace optflowbm_fast } } - __device__ void operator()(PtrStepf velx, PtrStepf vely) const + __device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const { int tbx = blockIdx.x * TILE_COLS; int tby = blockIdx.y * TILE_ROWS; diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index 0af6718085..c47ba95c69 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -474,6 +474,21 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ void fixture##_##name::PerfTestBody() +#define GPU_PERF_TEST_P(fixture, name, params) \ + class fixture##_##name : public fixture {\ + public:\ + fixture##_##name() {}\ + protected:\ + virtual void PerfTestBody();\ + };\ + TEST_P(fixture##_##name, name /*perf*/) \ + { \ + try { RunPerfTestBody(); } \ + catch (...) { cv::gpu::resetDevice(); throw; } \ + } \ + INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ + void fixture##_##name::PerfTestBody() + #define CV_PERF_TEST_MAIN(testsuitname, ...) \ int main(int argc, char **argv)\