merge CUDA dev branch

pull/276/head
marina.kolpakova 12 years ago
commit 45191dd6a5
  1. 2
      cmake/OpenCVDetectCUDA.cmake
  2. 21
      modules/gpu/app/nv_perf_test/main.cpp
  3. 4
      modules/gpu/perf/perf_imgproc.cpp
  4. 12
      modules/gpu/perf/perf_softcascade.cpp
  5. 10
      modules/gpu/src/cuda/optflowbm.cu
  6. 8
      modules/gpu/src/softcascade.cpp
  7. 15
      modules/ts/include/opencv2/ts/ts_perf.hpp
  8. 3
      samples/gpu/softcascade.cpp

@ -34,7 +34,7 @@ if(CUDA_FOUND)
message(STATUS "CUDA detected: " ${CUDA_VERSION}) message(STATUS "CUDA detected: " ${CUDA_VERSION})
if (CARMA) if (CARMA)
set(CUDA_ARCH_BIN "3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_BIN "2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX "3.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") set(CUDA_ARCH_PTX "3.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
else() else()
set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")

@ -75,15 +75,14 @@ int main(int argc, char* argv[])
DEF_PARAM_TEST_1(Image, std::string); DEF_PARAM_TEST_1(Image, std::string);
PERF_TEST_P(Image, HoughLinesP, GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg")))
testing::Values(std::string("im1_1280x800.jpg")))
{ {
declare.time(30.0); declare.time(30.0);
std::string fileName = GetParam(); std::string fileName = GetParam();
const double rho = 1.0; const float rho = 1.f;
const double theta = 1.0; const float theta = 1.f;
const int threshold = 40; const int threshold = 40;
const int minLineLenght = 20; const int minLineLenght = 20;
const int maxLineGap = 5; const int maxLineGap = 5;
@ -125,7 +124,7 @@ PERF_TEST_P(Image, HoughLinesP,
DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth); DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth);
PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, GPU_PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
testing::Combine( testing::Combine(
testing::Values(std::string("im1_1280x800.jpg")), testing::Values(std::string("im1_1280x800.jpg")),
testing::Values(CV_8U, CV_16U) testing::Values(CV_8U, CV_16U)
@ -193,7 +192,7 @@ typedef std::pair<std::string, std::string> string_pair;
DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool); DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool);
PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, GPU_PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
testing::Combine( testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(CV_8U, CV_16U), testing::Values(CV_8U, CV_16U),
@ -287,7 +286,7 @@ PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth); DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth);
PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, GPU_PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
testing::Combine( testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(CV_8U, CV_16U) testing::Values(CV_8U, CV_16U)
@ -384,7 +383,7 @@ void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size); DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size);
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
testing::Combine( testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(cv::Size(16, 16)), testing::Values(cv::Size(16, 16)),
@ -392,7 +391,7 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
testing::Values(cv::Size(16, 16)) testing::Values(cv::Size(16, 16))
)) ))
{ {
declare.time(1000); declare.time(3000);
const string_pair fileNames = std::tr1::get<0>(GetParam()); const string_pair fileNames = std::tr1::get<0>(GetParam());
const cv::Size block_size = std::tr1::get<1>(GetParam()); const cv::Size block_size = std::tr1::get<1>(GetParam());
@ -435,7 +434,7 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
SANITY_CHECK(0); SANITY_CHECK(0);
} }
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
testing::Combine( testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(cv::Size(16, 16)), testing::Values(cv::Size(16, 16)),
@ -443,7 +442,7 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
testing::Values(cv::Size(16, 16)) testing::Values(cv::Size(16, 16))
)) ))
{ {
declare.time(1000); declare.time(3000);
const string_pair fileNames = std::tr1::get<0>(GetParam()); const string_pair fileNames = std::tr1::get<0>(GetParam());
const cv::Size block_size = std::tr1::get<1>(GetParam()); const cv::Size block_size = std::tr1::get<1>(GetParam());

@ -1805,8 +1805,8 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s
std::string fileName = getDataPath(GetParam()); std::string fileName = getDataPath(GetParam());
const float rho = 1.f; const float rho = 1.0f;
const float theta = float(CV_PI) / 180.f; const float theta = static_cast<float>(CV_PI / 180.0);
const int threshold = 100; const int threshold = 100;
const int minLineLenght = 50; const int minLineLenght = 50;
const int maxLineGap = 5; const int maxLineGap = 5;

@ -1,6 +1,6 @@
#include "perf_precomp.hpp" #include "perf_precomp.hpp"
#define GPU_PERF_TEST_P(fixture, name, params) \ #define PERF_TEST_P1(fixture, name, params) \
class fixture##_##name : public fixture {\ class fixture##_##name : public fixture {\
public:\ public:\
fixture##_##name() {}\ fixture##_##name() {}\
@ -52,7 +52,7 @@ namespace {
typedef std::tr1::tuple<std::string, std::string> fixture_t; typedef std::tr1::tuple<std::string, std::string> fixture_t;
typedef perf::TestBaseWithParam<fixture_t> SCascadeTest; typedef perf::TestBaseWithParam<fixture_t> SCascadeTest;
GPU_PERF_TEST_P(SCascadeTest, detect, PERF_TEST_P1(SCascadeTest, detect,
testing::Combine( testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")))) testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png"))))
@ -108,7 +108,7 @@ static cv::Rect getFromTable(int idx)
typedef std::tr1::tuple<std::string, std::string, int> roi_fixture_t; typedef std::tr1::tuple<std::string, std::string, int> roi_fixture_t;
typedef perf::TestBaseWithParam<roi_fixture_t> SCascadeTestRoi; typedef perf::TestBaseWithParam<roi_fixture_t> SCascadeTestRoi;
GPU_PERF_TEST_P(SCascadeTestRoi, detectInRoi, PERF_TEST_P1(SCascadeTestRoi, detectInRoi,
testing::Combine( testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")), testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")),
@ -152,7 +152,7 @@ RUN_GPU(SCascadeTestRoi, detectInRoi)
NO_CPU(SCascadeTestRoi, detectInRoi) NO_CPU(SCascadeTestRoi, detectInRoi)
GPU_PERF_TEST_P(SCascadeTestRoi, detectEachRoi, PERF_TEST_P1(SCascadeTestRoi, detectEachRoi,
testing::Combine( testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")), testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")),
@ -191,7 +191,7 @@ RUN_GPU(SCascadeTestRoi, detectEachRoi)
NO_CPU(SCascadeTestRoi, detectEachRoi) NO_CPU(SCascadeTestRoi, detectEachRoi)
GPU_PERF_TEST_P(SCascadeTest, detectOnIntegral, PERF_TEST_P1(SCascadeTest, detectOnIntegral,
testing::Combine( testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/integrals.xml")))) testing::Values(std::string("cv/cascadeandhog/integrals.xml"))))
@ -239,7 +239,7 @@ RUN_GPU(SCascadeTest, detectOnIntegral)
NO_CPU(SCascadeTest, detectOnIntegral) NO_CPU(SCascadeTest, detectOnIntegral)
GPU_PERF_TEST_P(SCascadeTest, detectStream, PERF_TEST_P1(SCascadeTest, detectStream,
testing::Combine( testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")), testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")))) testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png"))))

@ -210,7 +210,7 @@ namespace optflowbm_fast
{ {
} }
__device__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const __device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{ {
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{ {
@ -246,7 +246,7 @@ namespace optflowbm_fast
} }
} }
__device__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const __device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{ {
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{ {
@ -271,7 +271,7 @@ namespace optflowbm_fast
} }
} }
__device__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const __device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{ {
int ay = i; int ay = i;
int ax = j + block_radius; int ax = j + block_radius;
@ -298,7 +298,7 @@ namespace optflowbm_fast
} }
} }
__device__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const __device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const
{ {
int bestDist = numeric_limits<int>::max(); int bestDist = numeric_limits<int>::max();
int bestInd = -1; int bestInd = -1;
@ -328,7 +328,7 @@ namespace optflowbm_fast
} }
} }
__device__ void operator()(PtrStepf velx, PtrStepf vely) const __device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const
{ {
int tbx = blockIdx.x * TILE_COLS; int tbx = blockIdx.x * TILE_COLS;
int tby = blockIdx.y * TILE_ROWS; int tby = blockIdx.y * TILE_ROWS;

@ -276,8 +276,8 @@ struct cv::gpu::SCascade::Fields
int dcs = 0; int dcs = 0;
for (int sc = 0; sc < totals; ++sc) for (int sc = 0; sc < totals; ++sc)
{ {
int width = ::std::max(0.0f, fw - (origObjWidth * scale)); int width = (int)::std::max(0.0f, fw - (origObjWidth * scale));
int height = ::std::max(0.0f, fh - (origObjHeight * scale)); int height = (int)::std::max(0.0f, fh - (origObjHeight * scale));
float logScale = ::log(scale); float logScale = ::log(scale);
int fit = fitOctave(voctaves, logScale); int fit = fitOctave(voctaves, logScale);
@ -457,7 +457,7 @@ cv::gpu::SCascade::~SCascade() { delete fields; }
bool cv::gpu::SCascade::load(const FileNode& fn) bool cv::gpu::SCascade::load(const FileNode& fn)
{ {
if (fields) delete fields; if (fields) delete fields;
fields = Fields::parseCascade(fn, minScale, maxScale, scales, flags); fields = Fields::parseCascade(fn, (float)minScale, (float)maxScale, scales, flags);
return fields != 0; return fields != 0;
} }
@ -488,7 +488,7 @@ void cv::gpu::SCascade::detect(InputArray _image, InputArray _rois, OutputArray
{ {
flds.update(image.rows, image.cols, flds.shrinkage); flds.update(image.rows, image.cols, flds.shrinkage);
if (flds.check(minScale, maxScale, scales)) if (flds.check((float)minScale, (float)maxScale, scales))
flds.createLevels(image.rows, image.cols); flds.createLevels(image.rows, image.cols);
flds.preprocessor->apply(image, flds.shrunk); flds.preprocessor->apply(image, flds.shrunk);

@ -474,6 +474,21 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
void fixture##_##name::PerfTestBody() void fixture##_##name::PerfTestBody()
#define GPU_PERF_TEST_P(fixture, name, params) \
class fixture##_##name : public fixture {\
public:\
fixture##_##name() {}\
protected:\
virtual void PerfTestBody();\
};\
TEST_P(fixture##_##name, name /*perf*/) \
{ \
try { RunPerfTestBody(); } \
catch (...) { cv::gpu::resetDevice(); throw; } \
} \
INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
void fixture##_##name::PerfTestBody()
#define CV_PERF_TEST_MAIN(testsuitname, ...) \ #define CV_PERF_TEST_MAIN(testsuitname, ...) \
int main(int argc, char **argv)\ int main(int argc, char **argv)\

@ -98,7 +98,8 @@ int main(int argc, char** argv)
std::cout << "working..." << std::endl; std::cout << "working..." << std::endl;
cv::imshow("Soft Cascade demo", result); cv::imshow("Soft Cascade demo", result);
cv::waitKey(10); if (27 == cv::waitKey(10))
break;
} }
return 0; return 0;

Loading…
Cancel
Save