merge CUDA dev branch

pull/276/head
marina.kolpakova 12 years ago
commit 45191dd6a5
  1. 2
      cmake/OpenCVDetectCUDA.cmake
  2. 65
      modules/gpu/app/nv_perf_test/main.cpp
  3. 4
      modules/gpu/perf/perf_imgproc.cpp
  4. 12
      modules/gpu/perf/perf_softcascade.cpp
  5. 10
      modules/gpu/src/cuda/optflowbm.cu
  6. 10
      modules/gpu/src/softcascade.cpp
  7. 15
      modules/ts/include/opencv2/ts/ts_perf.hpp
  8. 3
      samples/gpu/softcascade.cpp

@ -34,7 +34,7 @@ if(CUDA_FOUND)
message(STATUS "CUDA detected: " ${CUDA_VERSION})
if (CARMA)
set(CUDA_ARCH_BIN "3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_BIN "2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX "3.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
else()
set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0) 3.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")

@ -75,15 +75,14 @@ int main(int argc, char* argv[])
DEF_PARAM_TEST_1(Image, std::string);
PERF_TEST_P(Image, HoughLinesP,
testing::Values(std::string("im1_1280x800.jpg")))
GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg")))
{
declare.time(30.0);
std::string fileName = GetParam();
const double rho = 1.0;
const double theta = 1.0;
const float rho = 1.f;
const float theta = 1.f;
const int threshold = 40;
const int minLineLenght = 20;
const int maxLineGap = 5;
@ -125,8 +124,8 @@ PERF_TEST_P(Image, HoughLinesP,
DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth);
PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
testing::Combine(
GPU_PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
testing::Combine(
testing::Values(std::string("im1_1280x800.jpg")),
testing::Values(CV_8U, CV_16U)
))
@ -193,12 +192,12 @@ typedef std::pair<std::string, std::string> string_pair;
DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool);
PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(CV_8U, CV_16U),
testing::Bool()
))
GPU_PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(CV_8U, CV_16U),
testing::Bool()
))
{
declare.time(60);
@ -287,11 +286,11 @@ PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth);
PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(CV_8U, CV_16U)
))
GPU_PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(CV_8U, CV_16U)
))
{
declare.time(500);
@ -384,15 +383,15 @@ void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size);
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(cv::Size(16, 16)),
testing::Values(cv::Size(2, 2)),
testing::Values(cv::Size(16, 16))
))
GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(cv::Size(16, 16)),
testing::Values(cv::Size(2, 2)),
testing::Values(cv::Size(16, 16))
))
{
declare.time(1000);
declare.time(3000);
const string_pair fileNames = std::tr1::get<0>(GetParam());
const cv::Size block_size = std::tr1::get<1>(GetParam());
@ -435,15 +434,15 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
SANITY_CHECK(0);
}
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(cv::Size(16, 16)),
testing::Values(cv::Size(1, 1)),
testing::Values(cv::Size(16, 16))
))
GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
testing::Combine(
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
testing::Values(cv::Size(16, 16)),
testing::Values(cv::Size(1, 1)),
testing::Values(cv::Size(16, 16))
))
{
declare.time(1000);
declare.time(3000);
const string_pair fileNames = std::tr1::get<0>(GetParam());
const cv::Size block_size = std::tr1::get<1>(GetParam());

@ -1805,8 +1805,8 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP, testing::Values("cv/shared/pic5.png", "s
std::string fileName = getDataPath(GetParam());
const float rho = 1.f;
const float theta = float(CV_PI) / 180.f;
const float rho = 1.0f;
const float theta = static_cast<float>(CV_PI / 180.0);
const int threshold = 100;
const int minLineLenght = 50;
const int maxLineGap = 5;

@ -1,6 +1,6 @@
#include "perf_precomp.hpp"
#define GPU_PERF_TEST_P(fixture, name, params) \
#define PERF_TEST_P1(fixture, name, params) \
class fixture##_##name : public fixture {\
public:\
fixture##_##name() {}\
@ -52,7 +52,7 @@ namespace {
typedef std::tr1::tuple<std::string, std::string> fixture_t;
typedef perf::TestBaseWithParam<fixture_t> SCascadeTest;
GPU_PERF_TEST_P(SCascadeTest, detect,
PERF_TEST_P1(SCascadeTest, detect,
testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png"))))
@ -108,7 +108,7 @@ static cv::Rect getFromTable(int idx)
typedef std::tr1::tuple<std::string, std::string, int> roi_fixture_t;
typedef perf::TestBaseWithParam<roi_fixture_t> SCascadeTestRoi;
GPU_PERF_TEST_P(SCascadeTestRoi, detectInRoi,
PERF_TEST_P1(SCascadeTestRoi, detectInRoi,
testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")),
@ -152,7 +152,7 @@ RUN_GPU(SCascadeTestRoi, detectInRoi)
NO_CPU(SCascadeTestRoi, detectInRoi)
GPU_PERF_TEST_P(SCascadeTestRoi, detectEachRoi,
PERF_TEST_P1(SCascadeTestRoi, detectEachRoi,
testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png")),
@ -191,7 +191,7 @@ RUN_GPU(SCascadeTestRoi, detectEachRoi)
NO_CPU(SCascadeTestRoi, detectEachRoi)
GPU_PERF_TEST_P(SCascadeTest, detectOnIntegral,
PERF_TEST_P1(SCascadeTest, detectOnIntegral,
testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/integrals.xml"))))
@ -239,7 +239,7 @@ RUN_GPU(SCascadeTest, detectOnIntegral)
NO_CPU(SCascadeTest, detectOnIntegral)
GPU_PERF_TEST_P(SCascadeTest, detectStream,
PERF_TEST_P1(SCascadeTest, detectStream,
testing::Combine(
testing::Values(std::string("cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml")),
testing::Values(std::string("cv/cascadeandhog/bahnhof/image_00000000_0.png"))))

@ -210,7 +210,7 @@ namespace optflowbm_fast
{
}
__device__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
__device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
@ -246,7 +246,7 @@ namespace optflowbm_fast
}
}
__device__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
__device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE)
{
@ -271,7 +271,7 @@ namespace optflowbm_fast
}
}
__device__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
__device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const
{
int ay = i;
int ax = j + block_radius;
@ -298,7 +298,7 @@ namespace optflowbm_fast
}
}
__device__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const
__device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const
{
int bestDist = numeric_limits<int>::max();
int bestInd = -1;
@ -328,7 +328,7 @@ namespace optflowbm_fast
}
}
__device__ void operator()(PtrStepf velx, PtrStepf vely) const
__device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const
{
int tbx = blockIdx.x * TILE_COLS;
int tby = blockIdx.y * TILE_ROWS;

@ -276,8 +276,8 @@ struct cv::gpu::SCascade::Fields
int dcs = 0;
for (int sc = 0; sc < totals; ++sc)
{
int width = ::std::max(0.0f, fw - (origObjWidth * scale));
int height = ::std::max(0.0f, fh - (origObjHeight * scale));
int width = (int)::std::max(0.0f, fw - (origObjWidth * scale));
int height = (int)::std::max(0.0f, fh - (origObjHeight * scale));
float logScale = ::log(scale);
int fit = fitOctave(voctaves, logScale);
@ -457,7 +457,7 @@ cv::gpu::SCascade::~SCascade() { delete fields; }
bool cv::gpu::SCascade::load(const FileNode& fn)
{
if (fields) delete fields;
fields = Fields::parseCascade(fn, minScale, maxScale, scales, flags);
fields = Fields::parseCascade(fn, (float)minScale, (float)maxScale, scales, flags);
return fields != 0;
}
@ -488,7 +488,7 @@ void cv::gpu::SCascade::detect(InputArray _image, InputArray _rois, OutputArray
{
flds.update(image.rows, image.cols, flds.shrinkage);
if (flds.check(minScale, maxScale, scales))
if (flds.check((float)minScale, (float)maxScale, scales))
flds.createLevels(image.rows, image.cols);
flds.preprocessor->apply(image, flds.shrunk);
@ -672,4 +672,4 @@ cv::Ptr<cv::gpu::ChannelsProcessor> cv::gpu::ChannelsProcessor::create(const int
cv::gpu::ChannelsProcessor::ChannelsProcessor() { }
cv::gpu::ChannelsProcessor::~ChannelsProcessor() { }
#endif
#endif

@ -474,6 +474,21 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os);
INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
void fixture##_##name::PerfTestBody()
#define GPU_PERF_TEST_P(fixture, name, params) \
class fixture##_##name : public fixture {\
public:\
fixture##_##name() {}\
protected:\
virtual void PerfTestBody();\
};\
TEST_P(fixture##_##name, name /*perf*/) \
{ \
try { RunPerfTestBody(); } \
catch (...) { cv::gpu::resetDevice(); throw; } \
} \
INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\
void fixture##_##name::PerfTestBody()
#define CV_PERF_TEST_MAIN(testsuitname, ...) \
int main(int argc, char **argv)\

@ -98,7 +98,8 @@ int main(int argc, char** argv)
std::cout << "working..." << std::endl;
cv::imshow("Soft Cascade demo", result);
cv::waitKey(10);
if (27 == cv::waitKey(10))
break;
}
return 0;

Loading…
Cancel
Save