Merge pull request #485 from jet47:gpu-new-functionality
commit
37c6357b97
38 changed files with 5603 additions and 298 deletions
@ -0,0 +1,28 @@ |
||||
set(PERF4AU_REQUIRED_DEPS opencv_core opencv_imgproc opencv_highgui opencv_video opencv_legacy opencv_gpu opencv_ts) |
||||
|
||||
ocv_check_dependencies(${PERF4AU_REQUIRED_DEPS}) |
||||
|
||||
set(the_target gpu_perf4au) |
||||
project(${the_target}) |
||||
|
||||
ocv_include_modules(${PERF4AU_REQUIRED_DEPS}) |
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS) |
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function") |
||||
endif() |
||||
|
||||
file(GLOB srcs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp *.h *.hpp) |
||||
add_executable(${the_target} ${srcs}) |
||||
|
||||
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${PERF4AU_REQUIRED_DEPS}) |
||||
|
||||
if(ENABLE_SOLUTION_FOLDERS) |
||||
set_target_properties(${the_target} PROPERTIES FOLDER "tests performance") |
||||
endif() |
||||
|
||||
if(WIN32) |
||||
if(MSVC AND NOT BUILD_SHARED_LIBS) |
||||
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") |
||||
endif() |
||||
endif() |
||||
|
After Width: | Height: | Size: 140 KiB |
After Width: | Height: | Size: 140 KiB |
@ -0,0 +1,490 @@ |
||||
#include <cstdio> |
||||
#ifdef HAVE_CVCONFIG_H |
||||
#include "cvconfig.h" |
||||
#endif |
||||
#include "opencv2/core/core.hpp" |
||||
#include "opencv2/gpu/gpu.hpp" |
||||
#include "opencv2/highgui/highgui.hpp" |
||||
#include "opencv2/video/video.hpp" |
||||
#include "opencv2/legacy/legacy.hpp" |
||||
#include "opencv2/ts/ts.hpp" |
||||
#include "opencv2/ts/ts_perf.hpp" |
||||
|
||||
static void printOsInfo() |
||||
{ |
||||
#if defined _WIN32 |
||||
# if defined _WIN64 |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"); fflush(stdout); |
||||
# else |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"); fflush(stdout); |
||||
# endif |
||||
#elif defined linux |
||||
# if defined _LP64 |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"); fflush(stdout); |
||||
# else |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"); fflush(stdout); |
||||
# endif |
||||
#elif defined __APPLE__ |
||||
# if defined _LP64 |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"); fflush(stdout); |
||||
# else |
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"); fflush(stdout); |
||||
# endif |
||||
#endif |
||||
} |
||||
|
||||
static void printCudaInfo() |
||||
{ |
||||
const int deviceCount = cv::gpu::getCudaEnabledDeviceCount(); |
||||
|
||||
printf("[----------]\n"); fflush(stdout); |
||||
printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount); fflush(stdout); |
||||
printf("[----------]\n"); fflush(stdout); |
||||
|
||||
for (int i = 0; i < deviceCount; ++i) |
||||
{ |
||||
cv::gpu::DeviceInfo info(i); |
||||
|
||||
printf("[----------]\n"); fflush(stdout); |
||||
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()); fflush(stdout); |
||||
printf("[ ] \tCompute capability: %d.%d\n", info.majorVersion(), info.minorVersion()); fflush(stdout); |
||||
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()); fflush(stdout); |
||||
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)); fflush(stdout); |
||||
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)); fflush(stdout); |
||||
if (!info.isCompatible()) |
||||
printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n"); |
||||
printf("[----------]\n"); fflush(stdout); |
||||
} |
||||
} |
||||
|
||||
int main(int argc, char* argv[]) |
||||
{ |
||||
printOsInfo(); |
||||
printCudaInfo(); |
||||
|
||||
perf::Regression::Init("nv_perf_test"); |
||||
perf::TestBase::Init(argc, argv); |
||||
testing::InitGoogleTest(&argc, argv); |
||||
|
||||
return RUN_ALL_TESTS(); |
||||
} |
||||
|
||||
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name |
||||
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name |
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// HoughLinesP
|
||||
|
||||
DEF_PARAM_TEST_1(Image, std::string); |
||||
|
||||
PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg"))) |
||||
{ |
||||
declare.time(30.0); |
||||
|
||||
std::string fileName = GetParam(); |
||||
|
||||
const float rho = 1.f; |
||||
const float theta = 1.f; |
||||
const int threshold = 40; |
||||
const int minLineLenght = 20; |
||||
const int maxLineGap = 5; |
||||
|
||||
cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE); |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::GpuMat d_image(image); |
||||
cv::gpu::GpuMat d_lines; |
||||
cv::gpu::HoughLinesBuf d_buf; |
||||
|
||||
cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); |
||||
|
||||
TEST_CYCLE() |
||||
{ |
||||
cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
cv::Mat mask; |
||||
cv::Canny(image, mask, 50, 100); |
||||
|
||||
std::vector<cv::Vec4i> lines; |
||||
cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); |
||||
|
||||
TEST_CYCLE() |
||||
{ |
||||
cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap); |
||||
} |
||||
} |
||||
|
||||
SANITY_CHECK(0); |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// GoodFeaturesToTrack
|
||||
|
||||
DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth); |
||||
|
||||
PERF_TEST_P(Image_Depth, GoodFeaturesToTrack, |
||||
testing::Combine( |
||||
testing::Values(std::string("im1_1280x800.jpg")), |
||||
testing::Values(CV_8U, CV_16U) |
||||
)) |
||||
{ |
||||
declare.time(60); |
||||
|
||||
const std::string fileName = std::tr1::get<0>(GetParam()); |
||||
const int depth = std::tr1::get<1>(GetParam()); |
||||
|
||||
const int maxCorners = 5000; |
||||
const double qualityLevel = 0.05; |
||||
const int minDistance = 5; |
||||
const int blockSize = 3; |
||||
const bool useHarrisDetector = true; |
||||
const double k = 0.05; |
||||
|
||||
cv::Mat src = cv::imread(fileName, cv::IMREAD_GRAYSCALE); |
||||
if (src.empty()) |
||||
FAIL() << "Unable to load source image [" << fileName << "]"; |
||||
|
||||
if (depth != CV_8U) |
||||
src.convertTo(src, depth); |
||||
|
||||
cv::Mat mask(src.size(), CV_8UC1, cv::Scalar::all(1)); |
||||
mask(cv::Rect(0, 0, 100, 100)).setTo(cv::Scalar::all(0)); |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k); |
||||
|
||||
cv::gpu::GpuMat d_src(src); |
||||
cv::gpu::GpuMat d_mask(mask); |
||||
cv::gpu::GpuMat d_pts; |
||||
|
||||
d_detector(d_src, d_pts, d_mask); |
||||
|
||||
TEST_CYCLE() |
||||
{ |
||||
d_detector(d_src, d_pts, d_mask); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if (depth != CV_8U) |
||||
FAIL() << "Unsupported depth"; |
||||
|
||||
cv::Mat pts; |
||||
|
||||
cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k); |
||||
|
||||
TEST_CYCLE() |
||||
{ |
||||
cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k); |
||||
} |
||||
} |
||||
|
||||
SANITY_CHECK(0); |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// OpticalFlowPyrLKSparse
|
||||
|
||||
typedef std::pair<std::string, std::string> string_pair; |
||||
|
||||
DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool); |
||||
|
||||
PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse, |
||||
testing::Combine( |
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||
testing::Values(CV_8U, CV_16U), |
||||
testing::Bool() |
||||
)) |
||||
{ |
||||
declare.time(60); |
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||
const int depth = std::tr1::get<1>(GetParam()); |
||||
const bool graySource = std::tr1::get<2>(GetParam()); |
||||
|
||||
// PyrLK params
|
||||
const cv::Size winSize(15, 15); |
||||
const int maxLevel = 5; |
||||
const cv::TermCriteria criteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 30, 0.01); |
||||
|
||||
// GoodFeaturesToTrack params
|
||||
const int maxCorners = 5000; |
||||
const double qualityLevel = 0.05; |
||||
const int minDistance = 5; |
||||
const int blockSize = 3; |
||||
const bool useHarrisDetector = true; |
||||
const double k = 0.05; |
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); |
||||
if (src1.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR); |
||||
if (src2.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||
|
||||
cv::Mat gray_src; |
||||
if (graySource) |
||||
gray_src = src1; |
||||
else |
||||
cv::cvtColor(src1, gray_src, cv::COLOR_BGR2GRAY); |
||||
|
||||
cv::Mat pts; |
||||
cv::goodFeaturesToTrack(gray_src, pts, maxCorners, qualityLevel, minDistance, cv::noArray(), blockSize, useHarrisDetector, k); |
||||
|
||||
if (depth != CV_8U) |
||||
{ |
||||
src1.convertTo(src1, depth); |
||||
src2.convertTo(src2, depth); |
||||
} |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::GpuMat d_src1(src1); |
||||
cv::gpu::GpuMat d_src2(src2); |
||||
cv::gpu::GpuMat d_pts(pts.reshape(2, 1)); |
||||
cv::gpu::GpuMat d_nextPts; |
||||
cv::gpu::GpuMat d_status; |
||||
|
||||
cv::gpu::PyrLKOpticalFlow d_pyrLK; |
||||
d_pyrLK.winSize = winSize; |
||||
d_pyrLK.maxLevel = maxLevel; |
||||
d_pyrLK.iters = criteria.maxCount; |
||||
d_pyrLK.useInitialFlow = false; |
||||
|
||||
d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status); |
||||
|
||||
TEST_CYCLE() |
||||
{ |
||||
d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if (depth != CV_8U) |
||||
FAIL() << "Unsupported depth"; |
||||
|
||||
cv::Mat nextPts; |
||||
cv::Mat status; |
||||
|
||||
cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria); |
||||
|
||||
TEST_CYCLE() |
||||
{ |
||||
cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria); |
||||
} |
||||
} |
||||
|
||||
SANITY_CHECK(0); |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// OpticalFlowFarneback
|
||||
|
||||
DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth); |
||||
|
||||
PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback, |
||||
testing::Combine( |
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||
testing::Values(CV_8U, CV_16U) |
||||
)) |
||||
{ |
||||
declare.time(500); |
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||
const int depth = std::tr1::get<1>(GetParam()); |
||||
|
||||
const double pyrScale = 0.5; |
||||
const int numLevels = 6; |
||||
const int winSize = 7; |
||||
const int numIters = 15; |
||||
const int polyN = 7; |
||||
const double polySigma = 1.5; |
||||
const int flags = cv::OPTFLOW_USE_INITIAL_FLOW; |
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); |
||||
if (src1.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); |
||||
if (src2.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||
|
||||
if (depth != CV_8U) |
||||
{ |
||||
src1.convertTo(src1, depth); |
||||
src2.convertTo(src2, depth); |
||||
} |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::GpuMat d_src1(src1); |
||||
cv::gpu::GpuMat d_src2(src2); |
||||
cv::gpu::GpuMat d_u(src1.size(), CV_32FC1, cv::Scalar::all(0)); |
||||
cv::gpu::GpuMat d_v(src1.size(), CV_32FC1, cv::Scalar::all(0)); |
||||
|
||||
cv::gpu::FarnebackOpticalFlow d_farneback; |
||||
d_farneback.pyrScale = pyrScale; |
||||
d_farneback.numLevels = numLevels; |
||||
d_farneback.winSize = winSize; |
||||
d_farneback.numIters = numIters; |
||||
d_farneback.polyN = polyN; |
||||
d_farneback.polySigma = polySigma; |
||||
d_farneback.flags = flags; |
||||
|
||||
d_farneback(d_src1, d_src2, d_u, d_v); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
d_farneback(d_src1, d_src2, d_u, d_v); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if (depth != CV_8U) |
||||
FAIL() << "Unsupported depth"; |
||||
|
||||
cv::Mat flow(src1.size(), CV_32FC2, cv::Scalar::all(0)); |
||||
|
||||
cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags); |
||||
} |
||||
} |
||||
|
||||
SANITY_CHECK(0); |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// OpticalFlowBM
|
||||
|
||||
void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr, |
||||
cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious, |
||||
cv::Mat& velx, cv::Mat& vely) |
||||
{ |
||||
cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height); |
||||
|
||||
velx.create(sz, CV_32FC1); |
||||
vely.create(sz, CV_32FC1); |
||||
|
||||
CvMat cvprev = prev; |
||||
CvMat cvcurr = curr; |
||||
|
||||
CvMat cvvelx = velx; |
||||
CvMat cvvely = vely; |
||||
|
||||
cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely); |
||||
} |
||||
|
||||
DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size); |
||||
|
||||
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, |
||||
testing::Combine( |
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||
testing::Values(cv::Size(16, 16)), |
||||
testing::Values(cv::Size(2, 2)), |
||||
testing::Values(cv::Size(16, 16)) |
||||
)) |
||||
{ |
||||
declare.time(3000); |
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||
const cv::Size block_size = std::tr1::get<1>(GetParam()); |
||||
const cv::Size shift_size = std::tr1::get<2>(GetParam()); |
||||
const cv::Size max_range = std::tr1::get<3>(GetParam()); |
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); |
||||
if (src1.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); |
||||
if (src2.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::GpuMat d_src1(src1); |
||||
cv::gpu::GpuMat d_src2(src2); |
||||
cv::gpu::GpuMat d_velx, d_vely, buf; |
||||
|
||||
cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
cv::Mat velx, vely; |
||||
|
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||
} |
||||
} |
||||
|
||||
SANITY_CHECK(0); |
||||
} |
||||
|
||||
PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, |
||||
testing::Combine( |
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), |
||||
testing::Values(cv::Size(16, 16)), |
||||
testing::Values(cv::Size(1, 1)), |
||||
testing::Values(cv::Size(16, 16)) |
||||
)) |
||||
{ |
||||
declare.time(3000); |
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam()); |
||||
const cv::Size block_size = std::tr1::get<1>(GetParam()); |
||||
const cv::Size shift_size = std::tr1::get<2>(GetParam()); |
||||
const cv::Size max_range = std::tr1::get<3>(GetParam()); |
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); |
||||
if (src1.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]"; |
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); |
||||
if (src2.empty()) |
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]"; |
||||
|
||||
if (PERF_RUN_GPU()) |
||||
{ |
||||
cv::gpu::GpuMat d_src1(src1); |
||||
cv::gpu::GpuMat d_src2(src2); |
||||
cv::gpu::GpuMat d_velx, d_vely; |
||||
|
||||
cv::gpu::FastOpticalFlowBM fastBM; |
||||
|
||||
fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
cv::Mat velx, vely; |
||||
|
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); |
||||
} |
||||
} |
||||
|
||||
SANITY_CHECK(0); |
||||
} |
@ -0,0 +1,414 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or bpied warranties, including, but not limited to, the bpied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/gpu/device/common.hpp" |
||||
#include "opencv2/gpu/device/limits.hpp" |
||||
#include "opencv2/gpu/device/functional.hpp" |
||||
#include "opencv2/gpu/device/reduce.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::device; |
||||
|
||||
namespace optflowbm |
||||
{ |
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp); |
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp); |
||||
|
||||
__device__ int cmpBlocks(int X1, int Y1, int X2, int Y2, int2 blockSize) |
||||
{ |
||||
int s = 0; |
||||
|
||||
for (int y = 0; y < blockSize.y; ++y) |
||||
{ |
||||
for (int x = 0; x < blockSize.x; ++x) |
||||
s += ::abs(tex2D(tex_prev, X1 + x, Y1 + y) - tex2D(tex_curr, X2 + x, Y2 + y)); |
||||
} |
||||
|
||||
return s; |
||||
} |
||||
|
||||
__global__ void calcOptFlowBM(PtrStepSzf velx, PtrStepf vely, const int2 blockSize, const int2 shiftSize, const bool usePrevious, |
||||
const int maxX, const int maxY, const int acceptLevel, const int escapeLevel, |
||||
const short2* ss, const int ssCount) |
||||
{ |
||||
const int j = blockIdx.x * blockDim.x + threadIdx.x; |
||||
const int i = blockIdx.y * blockDim.y + threadIdx.y; |
||||
|
||||
if (i >= velx.rows || j >= velx.cols) |
||||
return; |
||||
|
||||
const int X1 = j * shiftSize.x; |
||||
const int Y1 = i * shiftSize.y; |
||||
|
||||
const int offX = usePrevious ? __float2int_rn(velx(i, j)) : 0; |
||||
const int offY = usePrevious ? __float2int_rn(vely(i, j)) : 0; |
||||
|
||||
int X2 = X1 + offX; |
||||
int Y2 = Y1 + offY; |
||||
|
||||
int dist = numeric_limits<int>::max(); |
||||
|
||||
if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY) |
||||
dist = cmpBlocks(X1, Y1, X2, Y2, blockSize); |
||||
|
||||
int countMin = 1; |
||||
int sumx = offX; |
||||
int sumy = offY; |
||||
|
||||
if (dist > acceptLevel) |
||||
{ |
||||
// do brute-force search |
||||
for (int k = 0; k < ssCount; ++k) |
||||
{ |
||||
const short2 ssVal = ss[k]; |
||||
|
||||
const int dx = offX + ssVal.x; |
||||
const int dy = offY + ssVal.y; |
||||
|
||||
X2 = X1 + dx; |
||||
Y2 = Y1 + dy; |
||||
|
||||
if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY) |
||||
{ |
||||
const int tmpDist = cmpBlocks(X1, Y1, X2, Y2, blockSize); |
||||
if (tmpDist < acceptLevel) |
||||
{ |
||||
sumx = dx; |
||||
sumy = dy; |
||||
countMin = 1; |
||||
break; |
||||
} |
||||
|
||||
if (tmpDist < dist) |
||||
{ |
||||
dist = tmpDist; |
||||
sumx = dx; |
||||
sumy = dy; |
||||
countMin = 1; |
||||
} |
||||
else if (tmpDist == dist) |
||||
{ |
||||
sumx += dx; |
||||
sumy += dy; |
||||
countMin++; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (dist > escapeLevel) |
||||
{ |
||||
sumx = offX; |
||||
sumy = offY; |
||||
countMin = 1; |
||||
} |
||||
} |
||||
|
||||
velx(i, j) = static_cast<float>(sumx) / countMin; |
||||
vely(i, j) = static_cast<float>(sumy) / countMin; |
||||
} |
||||
|
||||
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious, |
||||
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream) |
||||
{ |
||||
bindTexture(&tex_prev, prev); |
||||
bindTexture(&tex_curr, curr); |
||||
|
||||
const dim3 block(32, 8); |
||||
const dim3 grid(divUp(velx.cols, block.x), divUp(vely.rows, block.y)); |
||||
|
||||
calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious, |
||||
maxX, maxY, acceptLevel, escapeLevel, ss, ssCount); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
if (stream == 0) |
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
} |
||||
|
||||
///////////////////////////////////////////////////////// |
||||
// Fast approximate version |
||||
|
||||
namespace optflowbm_fast |
||||
{ |
||||
enum |
||||
{ |
||||
CTA_SIZE = 128, |
||||
|
||||
TILE_COLS = 128, |
||||
TILE_ROWS = 32, |
||||
|
||||
STRIDE = CTA_SIZE |
||||
}; |
||||
|
||||
template <typename T> __device__ __forceinline__ int calcDist(T a, T b) |
||||
{ |
||||
return ::abs(a - b); |
||||
} |
||||
|
||||
template <class T> struct FastOptFlowBM |
||||
{ |
||||
|
||||
int search_radius; |
||||
int block_radius; |
||||
|
||||
int search_window; |
||||
int block_window; |
||||
|
||||
PtrStepSz<T> I0; |
||||
PtrStep<T> I1; |
||||
|
||||
mutable PtrStepi buffer; |
||||
|
||||
FastOptFlowBM(int search_window_, int block_window_, |
||||
PtrStepSz<T> I0_, PtrStepSz<T> I1_, |
||||
PtrStepi buffer_) : |
||||
search_radius(search_window_ / 2), block_radius(block_window_ / 2), |
||||
search_window(search_window_), block_window(block_window_), |
||||
I0(I0_), I1(I1_), |
||||
buffer(buffer_) |
||||
{ |
||||
} |
||||
|
||||
__device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const |
||||
{ |
||||
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) |
||||
{ |
||||
dist_sums[index] = 0; |
||||
|
||||
for (int tx = 0; tx < block_window; ++tx) |
||||
col_sums(tx, index) = 0; |
||||
|
||||
int y = index / search_window; |
||||
int x = index - y * search_window; |
||||
|
||||
int ay = i; |
||||
int ax = j; |
||||
|
||||
int by = i + y - search_radius; |
||||
int bx = j + x - search_radius; |
||||
|
||||
for (int tx = -block_radius; tx <= block_radius; ++tx) |
||||
{ |
||||
int col_sum = 0; |
||||
for (int ty = -block_radius; ty <= block_radius; ++ty) |
||||
{ |
||||
int dist = calcDist(I0(ay + ty, ax + tx), I1(by + ty, bx + tx)); |
||||
|
||||
dist_sums[index] += dist; |
||||
col_sum += dist; |
||||
} |
||||
|
||||
col_sums(tx + block_radius, index) = col_sum; |
||||
} |
||||
|
||||
up_col_sums(j, index) = col_sums(block_window - 1, index); |
||||
} |
||||
} |
||||
|
||||
__device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const |
||||
{ |
||||
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) |
||||
{ |
||||
int y = index / search_window; |
||||
int x = index - y * search_window; |
||||
|
||||
int ay = i; |
||||
int ax = j + block_radius; |
||||
|
||||
int by = i + y - search_radius; |
||||
int bx = j + x - search_radius + block_radius; |
||||
|
||||
int col_sum = 0; |
||||
|
||||
for (int ty = -block_radius; ty <= block_radius; ++ty) |
||||
col_sum += calcDist(I0(ay + ty, ax), I1(by + ty, bx)); |
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index); |
||||
|
||||
col_sums(first, index) = col_sum; |
||||
up_col_sums(j, index) = col_sum; |
||||
} |
||||
} |
||||
|
||||
__device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const |
||||
{ |
||||
int ay = i; |
||||
int ax = j + block_radius; |
||||
|
||||
T a_up = I0(ay - block_radius - 1, ax); |
||||
T a_down = I0(ay + block_radius, ax); |
||||
|
||||
for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE) |
||||
{ |
||||
int y = index / search_window; |
||||
int x = index - y * search_window; |
||||
|
||||
int by = i + y - search_radius; |
||||
int bx = j + x - search_radius + block_radius; |
||||
|
||||
T b_up = I1(by - block_radius - 1, bx); |
||||
T b_down = I1(by + block_radius, bx); |
||||
|
||||
int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up); |
||||
|
||||
dist_sums[index] += col_sum - col_sums(first, index); |
||||
col_sums(first, index) = col_sum; |
||||
up_col_sums(j, index) = col_sum; |
||||
} |
||||
} |
||||
|
||||
__device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const |
||||
{ |
||||
int bestDist = numeric_limits<int>::max(); |
||||
int bestInd = -1; |
||||
|
||||
for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) |
||||
{ |
||||
int curDist = dist_sums[index]; |
||||
if (curDist < bestDist) |
||||
{ |
||||
bestDist = curDist; |
||||
bestInd = index; |
||||
} |
||||
} |
||||
|
||||
__shared__ int cta_dist_buffer[CTA_SIZE]; |
||||
__shared__ int cta_ind_buffer[CTA_SIZE]; |
||||
|
||||
reduceKeyVal<CTA_SIZE>(cta_dist_buffer, bestDist, cta_ind_buffer, bestInd, threadIdx.x, less<int>()); |
||||
|
||||
if (threadIdx.x == 0) |
||||
{ |
||||
int y = bestInd / search_window; |
||||
int x = bestInd - y * search_window; |
||||
|
||||
velx = x - search_radius; |
||||
vely = y - search_radius; |
||||
} |
||||
} |
||||
|
||||
__device__ __forceinline__ void operator()(PtrStepf velx, PtrStepf vely) const |
||||
{ |
||||
int tbx = blockIdx.x * TILE_COLS; |
||||
int tby = blockIdx.y * TILE_ROWS; |
||||
|
||||
int tex = ::min(tbx + TILE_COLS, I0.cols); |
||||
int tey = ::min(tby + TILE_ROWS, I0.rows); |
||||
|
||||
PtrStepi col_sums; |
||||
col_sums.data = buffer.ptr(I0.cols + blockIdx.x * block_window) + blockIdx.y * search_window * search_window; |
||||
col_sums.step = buffer.step; |
||||
|
||||
PtrStepi up_col_sums; |
||||
up_col_sums.data = buffer.data + blockIdx.y * search_window * search_window; |
||||
up_col_sums.step = buffer.step; |
||||
|
||||
extern __shared__ int dist_sums[]; //search_window * search_window |
||||
|
||||
int first = 0; |
||||
|
||||
for (int i = tby; i < tey; ++i) |
||||
{ |
||||
for (int j = tbx; j < tex; ++j) |
||||
{ |
||||
__syncthreads(); |
||||
|
||||
if (j == tbx) |
||||
{ |
||||
initSums_BruteForce(i, j, dist_sums, col_sums, up_col_sums); |
||||
first = 0; |
||||
} |
||||
else |
||||
{ |
||||
if (i == tby) |
||||
shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums); |
||||
else |
||||
shiftRight_UpSums(i, j, first, dist_sums, col_sums, up_col_sums); |
||||
|
||||
first = (first + 1) % block_window; |
||||
} |
||||
|
||||
__syncthreads(); |
||||
|
||||
convolve_window(i, j, dist_sums, velx(i, j), vely(i, j)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
}; |
||||
|
||||
template<typename T> __global__ void optflowbm_fast_kernel(const FastOptFlowBM<T> fbm, PtrStepf velx, PtrStepf vely) |
||||
{ |
||||
fbm(velx, vely); |
||||
} |
||||
|
||||
void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows) |
||||
{ |
||||
dim3 grid(divUp(src_cols, TILE_COLS), divUp(src_rows, TILE_ROWS)); |
||||
|
||||
buffer_cols = search_window * search_window * grid.y; |
||||
buffer_rows = src_cols + block_window * grid.x; |
||||
} |
||||
|
||||
template <typename T> |
||||
void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream) |
||||
{ |
||||
FastOptFlowBM<T> fbm(search_window, block_window, I0, I1, buffer); |
||||
|
||||
dim3 block(CTA_SIZE, 1); |
||||
dim3 grid(divUp(I0.cols, TILE_COLS), divUp(I0.rows, TILE_ROWS)); |
||||
|
||||
size_t smem = search_window * search_window * sizeof(int); |
||||
|
||||
optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely); |
||||
cudaSafeCall ( cudaGetLastError () ); |
||||
|
||||
if (stream == 0) |
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
|
||||
template void calc<uchar>(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // !defined CUDA_DISABLER |
@ -0,0 +1,332 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or bpied warranties, including, but not limited to, the bpied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/gpu/device/common.hpp" |
||||
#include "opencv2/gpu/device/border_interpolate.hpp" |
||||
#include "opencv2/gpu/device/limits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::device; |
||||
|
||||
//////////////////////////////////////////////////////////// |
||||
// centeredGradient |
||||
|
||||
namespace tvl1flow |
||||
{ |
||||
__global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy) |
||||
{ |
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x; |
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y; |
||||
|
||||
if (x >= src.cols || y >= src.rows) |
||||
return; |
||||
|
||||
dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0))); |
||||
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x)); |
||||
} |
||||
|
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy) |
||||
{ |
||||
const dim3 block(32, 8); |
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); |
||||
|
||||
centeredGradientKernel<<<grid, block>>>(src, dx, dy); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////// |
||||
// warpBackward |
||||
|
||||
namespace tvl1flow |
||||
{ |
||||
static __device__ __forceinline__ float bicubicCoeff(float x_) |
||||
{ |
||||
float x = fabsf(x_); |
||||
if (x <= 1.0f) |
||||
{ |
||||
return x * x * (1.5f * x - 2.5f) + 1.0f; |
||||
} |
||||
else if (x < 2.0f) |
||||
{ |
||||
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; |
||||
} |
||||
else |
||||
{ |
||||
return 0.0f; |
||||
} |
||||
} |
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp); |
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp); |
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp); |
||||
|
||||
__global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho) |
||||
{ |
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x; |
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y; |
||||
|
||||
if (x >= I0.cols || y >= I0.rows) |
||||
return; |
||||
|
||||
const float u1Val = u1(y, x); |
||||
const float u2Val = u2(y, x); |
||||
|
||||
const float wx = x + u1Val; |
||||
const float wy = y + u2Val; |
||||
|
||||
const int xmin = ::ceilf(wx - 2.0f); |
||||
const int xmax = ::floorf(wx + 2.0f); |
||||
|
||||
const int ymin = ::ceilf(wy - 2.0f); |
||||
const int ymax = ::floorf(wy + 2.0f); |
||||
|
||||
float sum = 0.0f; |
||||
float sumx = 0.0f; |
||||
float sumy = 0.0f; |
||||
float wsum = 0.0f; |
||||
|
||||
for (int cy = ymin; cy <= ymax; ++cy) |
||||
{ |
||||
for (int cx = xmin; cx <= xmax; ++cx) |
||||
{ |
||||
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); |
||||
|
||||
sum += w * tex2D(tex_I1 , cx, cy); |
||||
sumx += w * tex2D(tex_I1x, cx, cy); |
||||
sumy += w * tex2D(tex_I1y, cx, cy); |
||||
|
||||
wsum += w; |
||||
} |
||||
} |
||||
|
||||
const float coeff = 1.0f / wsum; |
||||
|
||||
const float I1wVal = sum * coeff; |
||||
const float I1wxVal = sumx * coeff; |
||||
const float I1wyVal = sumy * coeff; |
||||
|
||||
I1w(y, x) = I1wVal; |
||||
I1wx(y, x) = I1wxVal; |
||||
I1wy(y, x) = I1wyVal; |
||||
|
||||
const float Ix2 = I1wxVal * I1wxVal; |
||||
const float Iy2 = I1wyVal * I1wyVal; |
||||
|
||||
// store the |Grad(I1)|^2 |
||||
grad(y, x) = Ix2 + Iy2; |
||||
|
||||
// compute the constant part of the rho function |
||||
const float I0Val = I0(y, x); |
||||
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; |
||||
} |
||||
|
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho) |
||||
{ |
||||
const dim3 block(32, 8); |
||||
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y)); |
||||
|
||||
bindTexture(&tex_I1 , I1); |
||||
bindTexture(&tex_I1x, I1x); |
||||
bindTexture(&tex_I1y, I1y); |
||||
|
||||
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////// |
||||
// estimateU |
||||
|
||||
namespace tvl1flow |
||||
{ |
||||
__device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x) |
||||
{ |
||||
if (x > 0 && y > 0) |
||||
{ |
||||
const float v1x = v1(y, x) - v1(y, x - 1); |
||||
const float v2y = v2(y, x) - v2(y - 1, x); |
||||
return v1x + v2y; |
||||
} |
||||
else |
||||
{ |
||||
if (y > 0) |
||||
return v1(y, 0) + v2(y, 0) - v2(y - 1, 0); |
||||
else |
||||
{ |
||||
if (x > 0) |
||||
return v1(0, x) - v1(0, x - 1) + v2(0, x); |
||||
else |
||||
return v1(0, 0) + v2(0, 0); |
||||
} |
||||
} |
||||
} |
||||
|
||||
__global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy, |
||||
const PtrStepf grad, const PtrStepf rho_c, |
||||
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, |
||||
PtrStepf u1, PtrStepf u2, PtrStepf error, |
||||
const float l_t, const float theta) |
||||
{ |
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x; |
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y; |
||||
|
||||
if (x >= I1wx.cols || y >= I1wx.rows) |
||||
return; |
||||
|
||||
const float I1wxVal = I1wx(y, x); |
||||
const float I1wyVal = I1wy(y, x); |
||||
const float gradVal = grad(y, x); |
||||
const float u1OldVal = u1(y, x); |
||||
const float u2OldVal = u2(y, x); |
||||
|
||||
const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); |
||||
|
||||
// estimate the values of the variable (v1, v2) (thresholding operator TH) |
||||
|
||||
float d1 = 0.0f; |
||||
float d2 = 0.0f; |
||||
|
||||
if (rho < -l_t * gradVal) |
||||
{ |
||||
d1 = l_t * I1wxVal; |
||||
d2 = l_t * I1wyVal; |
||||
} |
||||
else if (rho > l_t * gradVal) |
||||
{ |
||||
d1 = -l_t * I1wxVal; |
||||
d2 = -l_t * I1wyVal; |
||||
} |
||||
else if (gradVal > numeric_limits<float>::epsilon()) |
||||
{ |
||||
const float fi = -rho / gradVal; |
||||
d1 = fi * I1wxVal; |
||||
d2 = fi * I1wyVal; |
||||
} |
||||
|
||||
const float v1 = u1OldVal + d1; |
||||
const float v2 = u2OldVal + d2; |
||||
|
||||
// compute the divergence of the dual variable (p1, p2) |
||||
|
||||
const float div_p1 = divergence(p11, p12, y, x); |
||||
const float div_p2 = divergence(p21, p22, y, x); |
||||
|
||||
// estimate the values of the optical flow (u1, u2) |
||||
|
||||
const float u1NewVal = v1 + theta * div_p1; |
||||
const float u2NewVal = v2 + theta * div_p2; |
||||
|
||||
u1(y, x) = u1NewVal; |
||||
u2(y, x) = u2NewVal; |
||||
|
||||
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); |
||||
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); |
||||
error(y, x) = n1 + n2; |
||||
} |
||||
|
||||
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, |
||||
PtrStepSzf grad, PtrStepSzf rho_c, |
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, |
||||
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, |
||||
float l_t, float theta) |
||||
{ |
||||
const dim3 block(32, 8); |
||||
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); |
||||
|
||||
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
} |
||||
|
||||
//////////////////////////////////////////////////////////// |
||||
// estimateDualVariables |
||||
|
||||
namespace tvl1flow |
||||
{ |
||||
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut) |
||||
{ |
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x; |
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y; |
||||
|
||||
if (x >= u1.cols || y >= u1.rows) |
||||
return; |
||||
|
||||
const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x); |
||||
const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x); |
||||
|
||||
const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x); |
||||
const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x); |
||||
|
||||
const float g1 = ::hypotf(u1x, u1y); |
||||
const float g2 = ::hypotf(u2x, u2y); |
||||
|
||||
const float ng1 = 1.0f + taut * g1; |
||||
const float ng2 = 1.0f + taut * g2; |
||||
|
||||
p11(y, x) = (p11(y, x) + taut * u1x) / ng1; |
||||
p12(y, x) = (p12(y, x) + taut * u1y) / ng1; |
||||
p21(y, x) = (p21(y, x) + taut * u2x) / ng2; |
||||
p22(y, x) = (p22(y, x) + taut * u2y) / ng2; |
||||
} |
||||
|
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut) |
||||
{ |
||||
const dim3 block(32, 8); |
||||
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y)); |
||||
|
||||
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
} |
||||
|
||||
#endif // !defined CUDA_DISABLER |
@ -0,0 +1,243 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER) |
||||
|
||||
void cv::gpu::calcOpticalFlowBM(const GpuMat&, const GpuMat&, Size, Size, Size, bool, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } |
||||
|
||||
void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); } |
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
namespace optflowbm |
||||
{ |
||||
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious, |
||||
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream); |
||||
} |
||||
|
||||
void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blockSize, Size shiftSize, Size maxRange, bool usePrevious, GpuMat& velx, GpuMat& vely, GpuMat& buf, Stream& st) |
||||
{ |
||||
CV_Assert( prev.type() == CV_8UC1 ); |
||||
CV_Assert( curr.size() == prev.size() && curr.type() == prev.type() ); |
||||
|
||||
const Size velSize((prev.cols - blockSize.width + shiftSize.width) / shiftSize.width, |
||||
(prev.rows - blockSize.height + shiftSize.height) / shiftSize.height); |
||||
|
||||
velx.create(velSize, CV_32FC1); |
||||
vely.create(velSize, CV_32FC1); |
||||
|
||||
// scanning scheme coordinates
|
||||
vector<short2> ss((2 * maxRange.width + 1) * (2 * maxRange.height + 1)); |
||||
int ssCount = 0; |
||||
|
||||
// Calculate scanning scheme
|
||||
const int minCount = std::min(maxRange.width, maxRange.height); |
||||
|
||||
// use spiral search pattern
|
||||
//
|
||||
// 9 10 11 12
|
||||
// 8 1 2 13
|
||||
// 7 * 3 14
|
||||
// 6 5 4 15
|
||||
//... 20 19 18 17
|
||||
//
|
||||
|
||||
for (int i = 0; i < minCount; ++i) |
||||
{ |
||||
// four cycles along sides
|
||||
int x = -i - 1, y = x; |
||||
|
||||
// upper side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount) |
||||
{ |
||||
ss[ssCount].x = ++x; |
||||
ss[ssCount].y = y; |
||||
} |
||||
|
||||
// right side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount) |
||||
{ |
||||
ss[ssCount].x = x; |
||||
ss[ssCount].y = ++y; |
||||
} |
||||
|
||||
// bottom side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount) |
||||
{ |
||||
ss[ssCount].x = --x; |
||||
ss[ssCount].y = y; |
||||
} |
||||
|
||||
// left side
|
||||
for (int j = -i; j <= i + 1; ++j, ++ssCount) |
||||
{ |
||||
ss[ssCount].x = x; |
||||
ss[ssCount].y = --y; |
||||
} |
||||
} |
||||
|
||||
// the rest part
|
||||
if (maxRange.width < maxRange.height) |
||||
{ |
||||
const int xleft = -minCount; |
||||
|
||||
// cycle by neighbor rings
|
||||
for (int i = minCount; i < maxRange.height; ++i) |
||||
{ |
||||
// two cycles by x
|
||||
int y = -(i + 1); |
||||
int x = xleft; |
||||
|
||||
// upper side
|
||||
for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x) |
||||
{ |
||||
ss[ssCount].x = x; |
||||
ss[ssCount].y = y; |
||||
} |
||||
|
||||
x = xleft; |
||||
y = -y; |
||||
|
||||
// bottom side
|
||||
for (int j = -maxRange.width; j <= maxRange.width; ++j, ++ssCount, ++x) |
||||
{ |
||||
ss[ssCount].x = x; |
||||
ss[ssCount].y = y; |
||||
} |
||||
} |
||||
} |
||||
else if (maxRange.width > maxRange.height) |
||||
{ |
||||
const int yupper = -minCount; |
||||
|
||||
// cycle by neighbor rings
|
||||
for (int i = minCount; i < maxRange.width; ++i) |
||||
{ |
||||
// two cycles by y
|
||||
int x = -(i + 1); |
||||
int y = yupper; |
||||
|
||||
// left side
|
||||
for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y) |
||||
{ |
||||
ss[ssCount].x = x; |
||||
ss[ssCount].y = y; |
||||
} |
||||
|
||||
y = yupper; |
||||
x = -x; |
||||
|
||||
// right side
|
||||
for (int j = -maxRange.height; j <= maxRange.height; ++j, ++ssCount, ++y) |
||||
{ |
||||
ss[ssCount].x = x; |
||||
ss[ssCount].y = y; |
||||
} |
||||
} |
||||
} |
||||
|
||||
const cudaStream_t stream = StreamAccessor::getStream(st); |
||||
|
||||
ensureSizeIsEnough(1, ssCount, CV_16SC2, buf); |
||||
if (stream == 0) |
||||
cudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) ); |
||||
else |
||||
cudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) ); |
||||
|
||||
const int maxX = prev.cols - blockSize.width; |
||||
const int maxY = prev.rows - blockSize.height; |
||||
|
||||
const int SMALL_DIFF = 2; |
||||
const int BIG_DIFF = 128; |
||||
|
||||
const int blSize = blockSize.area(); |
||||
const int acceptLevel = blSize * SMALL_DIFF; |
||||
const int escapeLevel = blSize * BIG_DIFF; |
||||
|
||||
optflowbm::calc(prev, curr, velx, vely, |
||||
make_int2(blockSize.width, blockSize.height), make_int2(shiftSize.width, shiftSize.height), usePrevious, |
||||
maxX, maxY, acceptLevel, escapeLevel, buf.ptr<short2>(), ssCount, stream); |
||||
} |
||||
|
||||
namespace optflowbm_fast |
||||
{ |
||||
void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows); |
||||
|
||||
template <typename T> |
||||
void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream); |
||||
} |
||||
|
||||
void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream) |
||||
{ |
||||
CV_Assert( I0.type() == CV_8UC1 ); |
||||
CV_Assert( I1.size() == I0.size() && I1.type() == I0.type() ); |
||||
|
||||
int border_size = search_window / 2 + block_window / 2; |
||||
Size esize = I0.size() + Size(border_size, border_size) * 2; |
||||
|
||||
ensureSizeIsEnough(esize, I0.type(), extended_I0); |
||||
ensureSizeIsEnough(esize, I0.type(), extended_I1); |
||||
|
||||
copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream); |
||||
copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream); |
||||
|
||||
GpuMat I0_hdr = extended_I0(Rect(Point2i(border_size, border_size), I0.size())); |
||||
GpuMat I1_hdr = extended_I1(Rect(Point2i(border_size, border_size), I0.size())); |
||||
|
||||
int bcols, brows; |
||||
optflowbm_fast::get_buffer_size(I0.cols, I0.rows, search_window, block_window, bcols, brows); |
||||
|
||||
ensureSizeIsEnough(brows, bcols, CV_32SC1, buffer); |
||||
|
||||
flowx.create(I0.size(), CV_32FC1); |
||||
flowy.create(I0.size(), CV_32FC1); |
||||
|
||||
optflowbm_fast::calc<uchar>(I0_hdr, I1_hdr, flowx, flowy, buffer, search_window, block_window, StreamAccessor::getStream(stream)); |
||||
} |
||||
|
||||
#endif // HAVE_CUDA
|
@ -0,0 +1,256 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER) |
||||
|
||||
cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() { throw_nogpu(); } |
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } |
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() {} |
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } |
||||
|
||||
#else |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() |
||||
{ |
||||
tau = 0.25; |
||||
lambda = 0.15; |
||||
theta = 0.3; |
||||
nscales = 5; |
||||
warps = 5; |
||||
epsilon = 0.01; |
||||
iterations = 300; |
||||
useInitialFlow = false; |
||||
} |
||||
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy) |
||||
{ |
||||
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); |
||||
CV_Assert( I0.size() == I1.size() ); |
||||
CV_Assert( I0.type() == I1.type() ); |
||||
CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); |
||||
CV_Assert( nscales > 0 ); |
||||
|
||||
// allocate memory for the pyramid structure
|
||||
I0s.resize(nscales); |
||||
I1s.resize(nscales); |
||||
u1s.resize(nscales); |
||||
u2s.resize(nscales); |
||||
|
||||
I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); |
||||
I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); |
||||
|
||||
if (!useInitialFlow) |
||||
{ |
||||
flowx.create(I0.size(), CV_32FC1); |
||||
flowy.create(I0.size(), CV_32FC1); |
||||
} |
||||
|
||||
u1s[0] = flowx; |
||||
u2s[0] = flowy; |
||||
|
||||
I1x_buf.create(I0.size(), CV_32FC1); |
||||
I1y_buf.create(I0.size(), CV_32FC1); |
||||
|
||||
I1w_buf.create(I0.size(), CV_32FC1); |
||||
I1wx_buf.create(I0.size(), CV_32FC1); |
||||
I1wy_buf.create(I0.size(), CV_32FC1); |
||||
|
||||
grad_buf.create(I0.size(), CV_32FC1); |
||||
rho_c_buf.create(I0.size(), CV_32FC1); |
||||
|
||||
p11_buf.create(I0.size(), CV_32FC1); |
||||
p12_buf.create(I0.size(), CV_32FC1); |
||||
p21_buf.create(I0.size(), CV_32FC1); |
||||
p22_buf.create(I0.size(), CV_32FC1); |
||||
|
||||
diff_buf.create(I0.size(), CV_32FC1); |
||||
|
||||
// create the scales
|
||||
for (int s = 1; s < nscales; ++s) |
||||
{ |
||||
gpu::pyrDown(I0s[s - 1], I0s[s]); |
||||
gpu::pyrDown(I1s[s - 1], I1s[s]); |
||||
|
||||
if (I0s[s].cols < 16 || I0s[s].rows < 16) |
||||
{ |
||||
nscales = s; |
||||
break; |
||||
} |
||||
|
||||
if (useInitialFlow) |
||||
{ |
||||
gpu::pyrDown(u1s[s - 1], u1s[s]); |
||||
gpu::pyrDown(u2s[s - 1], u2s[s]); |
||||
|
||||
gpu::multiply(u1s[s], Scalar::all(0.5), u1s[s]); |
||||
gpu::multiply(u2s[s], Scalar::all(0.5), u2s[s]); |
||||
} |
||||
} |
||||
|
||||
// pyramidal structure for computing the optical flow
|
||||
for (int s = nscales - 1; s >= 0; --s) |
||||
{ |
||||
// compute the optical flow at the current scale
|
||||
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); |
||||
|
||||
// if this was the last scale, finish now
|
||||
if (s == 0) |
||||
break; |
||||
|
||||
// otherwise, upsample the optical flow
|
||||
|
||||
// zoom the optical flow for the next finer scale
|
||||
gpu::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); |
||||
gpu::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); |
||||
|
||||
// scale the optical flow with the appropriate zoom factor
|
||||
gpu::multiply(u1s[s - 1], Scalar::all(2), u1s[s - 1]); |
||||
gpu::multiply(u2s[s - 1], Scalar::all(2), u2s[s - 1]); |
||||
} |
||||
} |
||||
|
||||
namespace tvl1flow |
||||
{ |
||||
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy); |
||||
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho); |
||||
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, |
||||
PtrStepSzf grad, PtrStepSzf rho_c, |
||||
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, |
||||
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, |
||||
float l_t, float theta); |
||||
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut); |
||||
} |
||||
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2) |
||||
{ |
||||
using namespace tvl1flow; |
||||
|
||||
const double scaledEpsilon = epsilon * epsilon * I0.size().area(); |
||||
|
||||
CV_DbgAssert( I1.size() == I0.size() ); |
||||
CV_DbgAssert( I1.type() == I0.type() ); |
||||
CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); |
||||
CV_DbgAssert( u2.size() == u1.size() ); |
||||
|
||||
if (u1.empty()) |
||||
{ |
||||
u1.create(I0.size(), CV_32FC1); |
||||
u1.setTo(Scalar::all(0)); |
||||
|
||||
u2.create(I0.size(), CV_32FC1); |
||||
u2.setTo(Scalar::all(0)); |
||||
} |
||||
|
||||
GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
centeredGradient(I1, I1x, I1y); |
||||
|
||||
GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
p11.setTo(Scalar::all(0)); |
||||
p12.setTo(Scalar::all(0)); |
||||
p21.setTo(Scalar::all(0)); |
||||
p22.setTo(Scalar::all(0)); |
||||
|
||||
GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
const float l_t = static_cast<float>(lambda * theta); |
||||
const float taut = static_cast<float>(tau / theta); |
||||
|
||||
for (int warpings = 0; warpings < warps; ++warpings) |
||||
{ |
||||
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); |
||||
|
||||
double error = numeric_limits<double>::max(); |
||||
for (int n = 0; error > scaledEpsilon && n < iterations; ++n) |
||||
{ |
||||
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta)); |
||||
|
||||
error = gpu::sum(diff, norm_buf)[0]; |
||||
|
||||
estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() |
||||
{ |
||||
I0s.clear(); |
||||
I1s.clear(); |
||||
u1s.clear(); |
||||
u2s.clear(); |
||||
|
||||
I1x_buf.release(); |
||||
I1y_buf.release(); |
||||
|
||||
I1w_buf.release(); |
||||
I1wx_buf.release(); |
||||
I1wy_buf.release(); |
||||
|
||||
grad_buf.release(); |
||||
rho_c_buf.release(); |
||||
|
||||
p11_buf.release(); |
||||
p12_buf.release(); |
||||
p21_buf.release(); |
||||
p22_buf.release(); |
||||
|
||||
diff_buf.release(); |
||||
norm_buf.release(); |
||||
} |
||||
|
||||
#endif // !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
@ -0,0 +1,130 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other GpuMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
#if CUDA_VERSION >= 5000 |
||||
|
||||
struct Async : testing::TestWithParam<cv::gpu::DeviceInfo> |
||||
{ |
||||
cv::gpu::CudaMem src; |
||||
cv::gpu::GpuMat d_src; |
||||
|
||||
cv::gpu::CudaMem dst; |
||||
cv::gpu::GpuMat d_dst; |
||||
|
||||
virtual void SetUp() |
||||
{ |
||||
cv::gpu::DeviceInfo devInfo = GetParam(); |
||||
cv::gpu::setDevice(devInfo.deviceID()); |
||||
|
||||
cv::Mat m = randomMat(cv::Size(128, 128), CV_8UC1); |
||||
src.create(m.size(), m.type(), cv::gpu::CudaMem::ALLOC_PAGE_LOCKED); |
||||
m.copyTo(src.createMatHeader()); |
||||
} |
||||
}; |
||||
|
||||
void checkMemSet(cv::gpu::Stream&, int status, void* userData) |
||||
{ |
||||
ASSERT_EQ(cudaSuccess, status); |
||||
|
||||
Async* test = reinterpret_cast<Async*>(userData); |
||||
|
||||
cv::Mat src = test->src; |
||||
cv::Mat dst = test->dst; |
||||
|
||||
cv::Mat dst_gold = cv::Mat::zeros(src.size(), src.type()); |
||||
|
||||
ASSERT_MAT_NEAR(dst_gold, dst, 0); |
||||
} |
||||
|
||||
GPU_TEST_P(Async, MemSet) |
||||
{ |
||||
cv::gpu::Stream stream; |
||||
|
||||
d_dst.upload(src); |
||||
|
||||
stream.enqueueMemSet(d_dst, cv::Scalar::all(0)); |
||||
stream.enqueueDownload(d_dst, dst); |
||||
|
||||
Async* test = this; |
||||
stream.enqueueHostCallback(checkMemSet, test); |
||||
|
||||
stream.waitForCompletion(); |
||||
} |
||||
|
||||
void checkConvert(cv::gpu::Stream&, int status, void* userData) |
||||
{ |
||||
ASSERT_EQ(cudaSuccess, status); |
||||
|
||||
Async* test = reinterpret_cast<Async*>(userData); |
||||
|
||||
cv::Mat src = test->src; |
||||
cv::Mat dst = test->dst; |
||||
|
||||
cv::Mat dst_gold; |
||||
src.convertTo(dst_gold, CV_32S); |
||||
|
||||
ASSERT_MAT_NEAR(dst_gold, dst, 0); |
||||
} |
||||
|
||||
GPU_TEST_P(Async, Convert) |
||||
{ |
||||
cv::gpu::Stream stream; |
||||
|
||||
stream.enqueueUpload(src, d_src); |
||||
stream.enqueueConvert(d_src, d_dst, CV_32S); |
||||
stream.enqueueDownload(d_dst, dst); |
||||
|
||||
Async* test = this; |
||||
stream.enqueueHostCallback(checkConvert, test); |
||||
|
||||
stream.waitForCompletion(); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Stream, Async, ALL_DEVICES); |
||||
|
||||
#endif |
||||
|
||||
#endif // HAVE_CUDA
|
@ -0,0 +1,30 @@ |
||||
#include "perf_precomp.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace perf; |
||||
|
||||
typedef TestBaseWithParam< pair<string, string> > ImagePair; |
||||
|
||||
pair<string, string> impair(const char* im1, const char* im2) |
||||
{ |
||||
return make_pair(string(im1), string(im2)); |
||||
} |
||||
|
||||
PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, testing::Values(impair("cv/optflow/RubberWhale1.png", "cv/optflow/RubberWhale2.png"))) |
||||
{ |
||||
declare.time(40); |
||||
|
||||
Mat frame1 = imread(getDataPath(GetParam().first), IMREAD_GRAYSCALE); |
||||
Mat frame2 = imread(getDataPath(GetParam().second), IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(frame1.empty()); |
||||
ASSERT_FALSE(frame2.empty()); |
||||
|
||||
Mat flow; |
||||
|
||||
Ptr<DenseOpticalFlow> tvl1 = createOptFlow_DualTVL1(); |
||||
|
||||
TEST_CYCLE_N(10) tvl1->calc(frame1, frame2, flow); |
||||
|
||||
SANITY_CHECK(flow, 0.5); |
||||
} |
@ -0,0 +1,937 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
//
|
||||
// This implementation is based on Javier Sánchez Pérez <jsanchez@dis.ulpgc.es> implementation.
|
||||
// Original BSD license:
|
||||
//
|
||||
// Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
*/ |
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
|
||||
namespace { |
||||
|
||||
class OpticalFlowDual_TVL1 : public DenseOpticalFlow |
||||
{ |
||||
public: |
||||
OpticalFlowDual_TVL1(); |
||||
|
||||
void calc(InputArray I0, InputArray I1, InputOutputArray flow); |
||||
void collectGarbage(); |
||||
|
||||
AlgorithmInfo* info() const; |
||||
|
||||
protected: |
||||
double tau; |
||||
double lambda; |
||||
double theta; |
||||
int nscales; |
||||
int warps; |
||||
double epsilon; |
||||
int iterations; |
||||
bool useInitialFlow; |
||||
|
||||
private: |
||||
void procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2); |
||||
|
||||
std::vector<Mat_<float> > I0s; |
||||
std::vector<Mat_<float> > I1s; |
||||
std::vector<Mat_<float> > u1s; |
||||
std::vector<Mat_<float> > u2s; |
||||
|
||||
Mat_<float> I1x_buf; |
||||
Mat_<float> I1y_buf; |
||||
|
||||
Mat_<float> flowMap1_buf; |
||||
Mat_<float> flowMap2_buf; |
||||
|
||||
Mat_<float> I1w_buf; |
||||
Mat_<float> I1wx_buf; |
||||
Mat_<float> I1wy_buf; |
||||
|
||||
Mat_<float> grad_buf; |
||||
Mat_<float> rho_c_buf; |
||||
|
||||
Mat_<float> v1_buf; |
||||
Mat_<float> v2_buf; |
||||
|
||||
Mat_<float> p11_buf; |
||||
Mat_<float> p12_buf; |
||||
Mat_<float> p21_buf; |
||||
Mat_<float> p22_buf; |
||||
|
||||
Mat_<float> div_p1_buf; |
||||
Mat_<float> div_p2_buf; |
||||
|
||||
Mat_<float> u1x_buf; |
||||
Mat_<float> u1y_buf; |
||||
Mat_<float> u2x_buf; |
||||
Mat_<float> u2y_buf; |
||||
}; |
||||
|
||||
OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() |
||||
{ |
||||
tau = 0.25; |
||||
lambda = 0.15; |
||||
theta = 0.3; |
||||
nscales = 5; |
||||
warps = 5; |
||||
epsilon = 0.01; |
||||
iterations = 300; |
||||
useInitialFlow = false; |
||||
} |
||||
|
||||
void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow) |
||||
{ |
||||
Mat I0 = _I0.getMat(); |
||||
Mat I1 = _I1.getMat(); |
||||
|
||||
CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); |
||||
CV_Assert( I0.size() == I1.size() ); |
||||
CV_Assert( I0.type() == I1.type() ); |
||||
CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) ); |
||||
CV_Assert( nscales > 0 ); |
||||
|
||||
// allocate memory for the pyramid structure
|
||||
I0s.resize(nscales); |
||||
I1s.resize(nscales); |
||||
u1s.resize(nscales); |
||||
u2s.resize(nscales); |
||||
|
||||
I0.convertTo(I0s[0], I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0); |
||||
I1.convertTo(I1s[0], I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0); |
||||
|
||||
if (useInitialFlow) |
||||
{ |
||||
u1s[0].create(I0.size()); |
||||
u2s[0].create(I0.size()); |
||||
|
||||
Mat_<float> mv[] = {u1s[0], u2s[0]}; |
||||
|
||||
split(_flow.getMat(), mv); |
||||
} |
||||
|
||||
I1x_buf.create(I0.size()); |
||||
I1y_buf.create(I0.size()); |
||||
|
||||
flowMap1_buf.create(I0.size()); |
||||
flowMap2_buf.create(I0.size()); |
||||
|
||||
I1w_buf.create(I0.size()); |
||||
I1wx_buf.create(I0.size()); |
||||
I1wy_buf.create(I0.size()); |
||||
|
||||
grad_buf.create(I0.size()); |
||||
rho_c_buf.create(I0.size()); |
||||
|
||||
v1_buf.create(I0.size()); |
||||
v2_buf.create(I0.size()); |
||||
|
||||
p11_buf.create(I0.size()); |
||||
p12_buf.create(I0.size()); |
||||
p21_buf.create(I0.size()); |
||||
p22_buf.create(I0.size()); |
||||
|
||||
div_p1_buf.create(I0.size()); |
||||
div_p2_buf.create(I0.size()); |
||||
|
||||
u1x_buf.create(I0.size()); |
||||
u1y_buf.create(I0.size()); |
||||
u2x_buf.create(I0.size()); |
||||
u2y_buf.create(I0.size()); |
||||
|
||||
// create the scales
|
||||
for (int s = 1; s < nscales; ++s) |
||||
{ |
||||
pyrDown(I0s[s - 1], I0s[s]); |
||||
pyrDown(I1s[s - 1], I1s[s]); |
||||
|
||||
if (I0s[s].cols < 16 || I0s[s].rows < 16) |
||||
{ |
||||
nscales = s; |
||||
break; |
||||
} |
||||
|
||||
if (useInitialFlow) |
||||
{ |
||||
pyrDown(u1s[s - 1], u1s[s]); |
||||
pyrDown(u2s[s - 1], u2s[s]); |
||||
|
||||
multiply(u1s[s], Scalar::all(0.5), u1s[s]); |
||||
multiply(u2s[s], Scalar::all(0.5), u2s[s]); |
||||
} |
||||
} |
||||
|
||||
// pyramidal structure for computing the optical flow
|
||||
for (int s = nscales - 1; s >= 0; --s) |
||||
{ |
||||
// compute the optical flow at the current scale
|
||||
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); |
||||
|
||||
// if this was the last scale, finish now
|
||||
if (s == 0) |
||||
break; |
||||
|
||||
// otherwise, upsample the optical flow
|
||||
|
||||
// zoom the optical flow for the next finer scale
|
||||
resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); |
||||
resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); |
||||
|
||||
// scale the optical flow with the appropriate zoom factor
|
||||
multiply(u1s[s - 1], Scalar::all(2), u1s[s - 1]); |
||||
multiply(u2s[s - 1], Scalar::all(2), u2s[s - 1]); |
||||
} |
||||
|
||||
Mat uxy[] = {u1s[0], u2s[0]}; |
||||
merge(uxy, 2, _flow); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// buildFlowMap
|
||||
|
||||
struct BuildFlowMapBody : ParallelLoopBody |
||||
{ |
||||
void operator() (const Range& range) const; |
||||
|
||||
Mat_<float> u1; |
||||
Mat_<float> u2; |
||||
mutable Mat_<float> map1; |
||||
mutable Mat_<float> map2; |
||||
}; |
||||
|
||||
void BuildFlowMapBody::operator() (const Range& range) const |
||||
{ |
||||
for (int y = range.start; y < range.end; ++y) |
||||
{ |
||||
const float* u1Row = u1[y]; |
||||
const float* u2Row = u2[y]; |
||||
|
||||
float* map1Row = map1[y]; |
||||
float* map2Row = map2[y]; |
||||
|
||||
for (int x = 0; x < u1.cols; ++x) |
||||
{ |
||||
map1Row[x] = x + u1Row[x]; |
||||
map2Row[x] = y + u2Row[x]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
void buildFlowMap(const Mat_<float>& u1, const Mat_<float>& u2, Mat_<float>& map1, Mat_<float>& map2) |
||||
{ |
||||
CV_DbgAssert( u2.size() == u1.size() ); |
||||
CV_DbgAssert( map1.size() == u1.size() ); |
||||
CV_DbgAssert( map2.size() == u1.size() ); |
||||
|
||||
BuildFlowMapBody body; |
||||
|
||||
body.u1 = u1; |
||||
body.u2 = u2; |
||||
body.map1 = map1; |
||||
body.map2 = map2; |
||||
|
||||
parallel_for_(Range(0, u1.rows), body); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// centeredGradient
|
||||
|
||||
struct CenteredGradientBody : ParallelLoopBody |
||||
{ |
||||
void operator() (const Range& range) const; |
||||
|
||||
Mat_<float> src; |
||||
mutable Mat_<float> dx; |
||||
mutable Mat_<float> dy; |
||||
}; |
||||
|
||||
void CenteredGradientBody::operator() (const Range& range) const |
||||
{ |
||||
const int last_col = src.cols - 1; |
||||
|
||||
for (int y = range.start; y < range.end; ++y) |
||||
{ |
||||
const float* srcPrevRow = src[y - 1]; |
||||
const float* srcCurRow = src[y]; |
||||
const float* srcNextRow = src[y + 1]; |
||||
|
||||
float* dxRow = dx[y]; |
||||
float* dyRow = dy[y]; |
||||
|
||||
for (int x = 1; x < last_col; ++x) |
||||
{ |
||||
dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]); |
||||
dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void centeredGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy) |
||||
{ |
||||
CV_DbgAssert( src.rows > 2 && src.cols > 2 ); |
||||
CV_DbgAssert( dx.size() == src.size() ); |
||||
CV_DbgAssert( dy.size() == src.size() ); |
||||
|
||||
const int last_row = src.rows - 1; |
||||
const int last_col = src.cols - 1; |
||||
|
||||
// compute the gradient on the center body of the image
|
||||
{ |
||||
CenteredGradientBody body; |
||||
|
||||
body.src = src; |
||||
body.dx = dx; |
||||
body.dy = dy; |
||||
|
||||
parallel_for_(Range(1, last_row), body); |
||||
} |
||||
|
||||
// compute the gradient on the first and last rows
|
||||
for (int x = 1; x < last_col; ++x) |
||||
{ |
||||
dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1)); |
||||
dy(0, x) = 0.5f * (src(1, x) - src(0, x)); |
||||
|
||||
dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1)); |
||||
dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x)); |
||||
} |
||||
|
||||
// compute the gradient on the first and last columns
|
||||
for (int y = 1; y < last_row; ++y) |
||||
{ |
||||
dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0)); |
||||
dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0)); |
||||
|
||||
dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1)); |
||||
dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col)); |
||||
} |
||||
|
||||
// compute the gradient at the four corners
|
||||
dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0)); |
||||
dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0)); |
||||
|
||||
dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1)); |
||||
dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col)); |
||||
|
||||
dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0)); |
||||
dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0)); |
||||
|
||||
dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1)); |
||||
dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col)); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// forwardGradient
|
||||
|
||||
struct ForwardGradientBody : ParallelLoopBody |
||||
{ |
||||
void operator() (const Range& range) const; |
||||
|
||||
Mat_<float> src; |
||||
mutable Mat_<float> dx; |
||||
mutable Mat_<float> dy; |
||||
}; |
||||
|
||||
void ForwardGradientBody::operator() (const Range& range) const |
||||
{ |
||||
const int last_col = src.cols - 1; |
||||
|
||||
for (int y = range.start; y < range.end; ++y) |
||||
{ |
||||
const float* srcCurRow = src[y]; |
||||
const float* srcNextRow = src[y + 1]; |
||||
|
||||
float* dxRow = dx[y]; |
||||
float* dyRow = dy[y]; |
||||
|
||||
for (int x = 0; x < last_col; ++x) |
||||
{ |
||||
dxRow[x] = srcCurRow[x + 1] - srcCurRow[x]; |
||||
dyRow[x] = srcNextRow[x] - srcCurRow[x]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
void forwardGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy) |
||||
{ |
||||
CV_DbgAssert( src.rows > 2 && src.cols > 2 ); |
||||
CV_DbgAssert( dx.size() == src.size() ); |
||||
CV_DbgAssert( dy.size() == src.size() ); |
||||
|
||||
const int last_row = src.rows - 1; |
||||
const int last_col = src.cols - 1; |
||||
|
||||
// compute the gradient on the central body of the image
|
||||
{ |
||||
ForwardGradientBody body; |
||||
|
||||
body.src = src; |
||||
body.dx = dx; |
||||
body.dy = dy; |
||||
|
||||
parallel_for_(Range(0, last_row), body); |
||||
} |
||||
|
||||
// compute the gradient on the last row
|
||||
for (int x = 0; x < last_col; ++x) |
||||
{ |
||||
dx(last_row, x) = src(last_row, x + 1) - src(last_row, x); |
||||
dy(last_row, x) = 0.0f; |
||||
} |
||||
|
||||
// compute the gradient on the last column
|
||||
for (int y = 0; y < last_row; ++y) |
||||
{ |
||||
dx(y, last_col) = 0.0f; |
||||
dy(y, last_col) = src(y + 1, last_col) - src(y, last_col); |
||||
} |
||||
|
||||
dx(last_row, last_col) = 0.0f; |
||||
dy(last_row, last_col) = 0.0f; |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// divergence
|
||||
|
||||
struct DivergenceBody : ParallelLoopBody |
||||
{ |
||||
void operator() (const Range& range) const; |
||||
|
||||
Mat_<float> v1; |
||||
Mat_<float> v2; |
||||
mutable Mat_<float> div; |
||||
}; |
||||
|
||||
void DivergenceBody::operator() (const Range& range) const |
||||
{ |
||||
for (int y = range.start; y < range.end; ++y) |
||||
{ |
||||
const float* v1Row = v1[y]; |
||||
const float* v2PrevRow = v2[y - 1]; |
||||
const float* v2CurRow = v2[y]; |
||||
|
||||
float* divRow = div[y]; |
||||
|
||||
for(int x = 1; x < v1.cols; ++x) |
||||
{ |
||||
const float v1x = v1Row[x] - v1Row[x - 1]; |
||||
const float v2y = v2CurRow[x] - v2PrevRow[x]; |
||||
|
||||
divRow[x] = v1x + v2y; |
||||
} |
||||
} |
||||
} |
||||
|
||||
void divergence(const Mat_<float>& v1, const Mat_<float>& v2, Mat_<float>& div) |
||||
{ |
||||
CV_DbgAssert( v1.rows > 2 && v1.cols > 2 ); |
||||
CV_DbgAssert( v2.size() == v1.size() ); |
||||
CV_DbgAssert( div.size() == v1.size() ); |
||||
|
||||
{ |
||||
DivergenceBody body; |
||||
|
||||
body.v1 = v1; |
||||
body.v2 = v2; |
||||
body.div = div; |
||||
|
||||
parallel_for_(Range(1, v1.rows), body); |
||||
} |
||||
|
||||
// compute the divergence on the first row
|
||||
for(int x = 1; x < v1.cols; ++x) |
||||
div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x); |
||||
|
||||
// compute the divergence on the first column
|
||||
for (int y = 1; y < v1.rows; ++y) |
||||
div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0); |
||||
|
||||
div(0, 0) = v1(0, 0) + v2(0, 0); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// calcGradRho
|
||||
|
||||
struct CalcGradRhoBody : ParallelLoopBody |
||||
{ |
||||
void operator() (const Range& range) const; |
||||
|
||||
Mat_<float> I0; |
||||
Mat_<float> I1w; |
||||
Mat_<float> I1wx; |
||||
Mat_<float> I1wy; |
||||
Mat_<float> u1; |
||||
Mat_<float> u2; |
||||
mutable Mat_<float> grad; |
||||
mutable Mat_<float> rho_c; |
||||
}; |
||||
|
||||
void CalcGradRhoBody::operator() (const Range& range) const |
||||
{ |
||||
for (int y = range.start; y < range.end; ++y) |
||||
{ |
||||
const float* I0Row = I0[y]; |
||||
const float* I1wRow = I1w[y]; |
||||
const float* I1wxRow = I1wx[y]; |
||||
const float* I1wyRow = I1wy[y]; |
||||
const float* u1Row = u1[y]; |
||||
const float* u2Row = u2[y]; |
||||
|
||||
float* gradRow = grad[y]; |
||||
float* rhoRow = rho_c[y]; |
||||
|
||||
for (int x = 0; x < I0.cols; ++x) |
||||
{ |
||||
const float Ix2 = I1wxRow[x] * I1wxRow[x]; |
||||
const float Iy2 = I1wyRow[x] * I1wyRow[x]; |
||||
|
||||
// store the |Grad(I1)|^2
|
||||
gradRow[x] = Ix2 + Iy2; |
||||
|
||||
// compute the constant part of the rho function
|
||||
rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void calcGradRho(const Mat_<float>& I0, const Mat_<float>& I1w, const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, |
||||
Mat_<float>& grad, Mat_<float>& rho_c) |
||||
{ |
||||
CV_DbgAssert( I1w.size() == I0.size() ); |
||||
CV_DbgAssert( I1wx.size() == I0.size() ); |
||||
CV_DbgAssert( I1wy.size() == I0.size() ); |
||||
CV_DbgAssert( u1.size() == I0.size() ); |
||||
CV_DbgAssert( u2.size() == I0.size() ); |
||||
CV_DbgAssert( grad.size() == I0.size() ); |
||||
CV_DbgAssert( rho_c.size() == I0.size() ); |
||||
|
||||
CalcGradRhoBody body; |
||||
|
||||
body.I0 = I0; |
||||
body.I1w = I1w; |
||||
body.I1wx = I1wx; |
||||
body.I1wy = I1wy; |
||||
body.u1 = u1; |
||||
body.u2 = u2; |
||||
body.grad = grad; |
||||
body.rho_c = rho_c; |
||||
|
||||
parallel_for_(Range(0, I0.rows), body); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// estimateV
|
||||
|
||||
struct EstimateVBody : ParallelLoopBody |
||||
{ |
||||
void operator() (const Range& range) const; |
||||
|
||||
Mat_<float> I1wx; |
||||
Mat_<float> I1wy; |
||||
Mat_<float> u1; |
||||
Mat_<float> u2; |
||||
Mat_<float> grad; |
||||
Mat_<float> rho_c; |
||||
mutable Mat_<float> v1; |
||||
mutable Mat_<float> v2; |
||||
float l_t; |
||||
}; |
||||
|
||||
void EstimateVBody::operator() (const Range& range) const |
||||
{ |
||||
for (int y = range.start; y < range.end; ++y) |
||||
{ |
||||
const float* I1wxRow = I1wx[y]; |
||||
const float* I1wyRow = I1wy[y]; |
||||
const float* u1Row = u1[y]; |
||||
const float* u2Row = u2[y]; |
||||
const float* gradRow = grad[y]; |
||||
const float* rhoRow = rho_c[y]; |
||||
|
||||
float* v1Row = v1[y]; |
||||
float* v2Row = v2[y]; |
||||
|
||||
for (int x = 0; x < I1wx.cols; ++x) |
||||
{ |
||||
const float rho = rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]); |
||||
|
||||
float d1 = 0.0f; |
||||
float d2 = 0.0f; |
||||
|
||||
if (rho < -l_t * gradRow[x]) |
||||
{ |
||||
d1 = l_t * I1wxRow[x]; |
||||
d2 = l_t * I1wyRow[x]; |
||||
} |
||||
else if (rho > l_t * gradRow[x]) |
||||
{ |
||||
d1 = -l_t * I1wxRow[x]; |
||||
d2 = -l_t * I1wyRow[x]; |
||||
} |
||||
else if (gradRow[x] > numeric_limits<float>::epsilon()) |
||||
{ |
||||
float fi = -rho / gradRow[x]; |
||||
d1 = fi * I1wxRow[x]; |
||||
d2 = fi * I1wyRow[x]; |
||||
} |
||||
|
||||
v1Row[x] = u1Row[x] + d1; |
||||
v2Row[x] = u2Row[x] + d2; |
||||
} |
||||
} |
||||
} |
||||
|
||||
void estimateV(const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, const Mat_<float>& grad, const Mat_<float>& rho_c, |
||||
Mat_<float>& v1, Mat_<float>& v2, float l_t) |
||||
{ |
||||
CV_DbgAssert( I1wy.size() == I1wx.size() ); |
||||
CV_DbgAssert( u1.size() == I1wx.size() ); |
||||
CV_DbgAssert( u2.size() == I1wx.size() ); |
||||
CV_DbgAssert( grad.size() == I1wx.size() ); |
||||
CV_DbgAssert( rho_c.size() == I1wx.size() ); |
||||
CV_DbgAssert( v1.size() == I1wx.size() ); |
||||
CV_DbgAssert( v2.size() == I1wx.size() ); |
||||
|
||||
EstimateVBody body; |
||||
|
||||
body.I1wx = I1wx; |
||||
body.I1wy = I1wy; |
||||
body.u1 = u1; |
||||
body.u2 = u2; |
||||
body.grad = grad; |
||||
body.rho_c = rho_c; |
||||
body.v1 = v1; |
||||
body.v2 = v2; |
||||
body.l_t = l_t; |
||||
|
||||
parallel_for_(Range(0, I1wx.rows), body); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// estimateU
|
||||
|
||||
float estimateU(const Mat_<float>& v1, const Mat_<float>& v2, const Mat_<float>& div_p1, const Mat_<float>& div_p2, Mat_<float>& u1, Mat_<float>& u2, float theta) |
||||
{ |
||||
CV_DbgAssert( v2.size() == v1.size() ); |
||||
CV_DbgAssert( div_p1.size() == v1.size() ); |
||||
CV_DbgAssert( div_p2.size() == v1.size() ); |
||||
CV_DbgAssert( u1.size() == v1.size() ); |
||||
CV_DbgAssert( u2.size() == v1.size() ); |
||||
|
||||
float error = 0.0f; |
||||
for (int y = 0; y < v1.rows; ++y) |
||||
{ |
||||
const float* v1Row = v1[y]; |
||||
const float* v2Row = v2[y]; |
||||
const float* divP1Row = div_p1[y]; |
||||
const float* divP2Row = div_p2[y]; |
||||
|
||||
float* u1Row = u1[y]; |
||||
float* u2Row = u2[y]; |
||||
|
||||
for (int x = 0; x < v1.cols; ++x) |
||||
{ |
||||
const float u1k = u1Row[x]; |
||||
const float u2k = u2Row[x]; |
||||
|
||||
u1Row[x] = v1Row[x] + theta * divP1Row[x]; |
||||
u2Row[x] = v2Row[x] + theta * divP2Row[x]; |
||||
|
||||
error += (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k); |
||||
} |
||||
} |
||||
|
||||
return error; |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// estimateDualVariables
|
||||
|
||||
struct EstimateDualVariablesBody : ParallelLoopBody |
||||
{ |
||||
void operator() (const Range& range) const; |
||||
|
||||
Mat_<float> u1x; |
||||
Mat_<float> u1y; |
||||
Mat_<float> u2x; |
||||
Mat_<float> u2y; |
||||
mutable Mat_<float> p11; |
||||
mutable Mat_<float> p12; |
||||
mutable Mat_<float> p21; |
||||
mutable Mat_<float> p22; |
||||
float taut; |
||||
}; |
||||
|
||||
void EstimateDualVariablesBody::operator() (const Range& range) const |
||||
{ |
||||
for (int y = range.start; y < range.end; ++y) |
||||
{ |
||||
const float* u1xRow = u1x[y]; |
||||
const float* u1yRow = u1y[y]; |
||||
const float* u2xRow = u2x[y]; |
||||
const float* u2yRow = u2y[y]; |
||||
|
||||
float* p11Row = p11[y]; |
||||
float* p12Row = p12[y]; |
||||
float* p21Row = p21[y]; |
||||
float* p22Row = p22[y]; |
||||
|
||||
for (int x = 0; x < u1x.cols; ++x) |
||||
{ |
||||
const float g1 = static_cast<float>(hypot(u1xRow[x], u1yRow[x])); |
||||
const float g2 = static_cast<float>(hypot(u2xRow[x], u2yRow[x])); |
||||
|
||||
const float ng1 = 1.0f + taut * g1; |
||||
const float ng2 = 1.0f + taut * g2; |
||||
|
||||
p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1; |
||||
p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1; |
||||
p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2; |
||||
p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2; |
||||
} |
||||
} |
||||
} |
||||
|
||||
void estimateDualVariables(const Mat_<float>& u1x, const Mat_<float>& u1y, const Mat_<float>& u2x, const Mat_<float>& u2y, |
||||
Mat_<float>& p11, Mat_<float>& p12, Mat_<float>& p21, Mat_<float>& p22, float taut) |
||||
{ |
||||
CV_DbgAssert( u1y.size() == u1x.size() ); |
||||
CV_DbgAssert( u2x.size() == u1x.size() ); |
||||
CV_DbgAssert( u2y.size() == u1x.size() ); |
||||
CV_DbgAssert( p11.size() == u1x.size() ); |
||||
CV_DbgAssert( p12.size() == u1x.size() ); |
||||
CV_DbgAssert( p21.size() == u1x.size() ); |
||||
CV_DbgAssert( p22.size() == u1x.size() ); |
||||
|
||||
EstimateDualVariablesBody body; |
||||
|
||||
body.u1x = u1x; |
||||
body.u1y = u1y; |
||||
body.u2x = u2x; |
||||
body.u2y = u2y; |
||||
body.p11 = p11; |
||||
body.p12 = p12; |
||||
body.p21 = p21; |
||||
body.p22 = p22; |
||||
body.taut = taut; |
||||
|
||||
parallel_for_(Range(0, u1x.rows), body); |
||||
} |
||||
|
||||
void OpticalFlowDual_TVL1::procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2) |
||||
{ |
||||
const float scaledEpsilon = static_cast<float>(epsilon * epsilon * I0.size().area()); |
||||
|
||||
CV_DbgAssert( I1.size() == I0.size() ); |
||||
CV_DbgAssert( I1.type() == I0.type() ); |
||||
CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); |
||||
CV_DbgAssert( u2.size() == u1.size() ); |
||||
|
||||
if (u1.empty()) |
||||
{ |
||||
u1.create(I0.size()); |
||||
u1.setTo(Scalar::all(0)); |
||||
|
||||
u2.create(I0.size()); |
||||
u2.setTo(Scalar::all(0)); |
||||
} |
||||
|
||||
Mat_<float> I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
centeredGradient(I1, I1x, I1y); |
||||
|
||||
Mat_<float> flowMap1 = flowMap1_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> flowMap2 = flowMap2_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
Mat_<float> I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
Mat_<float> grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
Mat_<float> v1 = v1_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> v2 = v2_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
Mat_<float> p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
p11.setTo(Scalar::all(0)); |
||||
p12.setTo(Scalar::all(0)); |
||||
p21.setTo(Scalar::all(0)); |
||||
p22.setTo(Scalar::all(0)); |
||||
|
||||
Mat_<float> div_p1 = div_p1_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> div_p2 = div_p2_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
Mat_<float> u1x = u1x_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> u1y = u1y_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> u2x = u2x_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
Mat_<float> u2y = u2y_buf(Rect(0, 0, I0.cols, I0.rows)); |
||||
|
||||
const float l_t = static_cast<float>(lambda * theta); |
||||
const float taut = static_cast<float>(tau / theta); |
||||
|
||||
for (int warpings = 0; warpings < warps; ++warpings) |
||||
{ |
||||
// compute the warping of the target image and its derivatives
|
||||
buildFlowMap(u1, u2, flowMap1, flowMap2); |
||||
remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC); |
||||
remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC); |
||||
remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC); |
||||
|
||||
calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c); |
||||
|
||||
float error = numeric_limits<float>::max(); |
||||
for (int n = 0; error > scaledEpsilon && n < iterations; ++n) |
||||
{ |
||||
// estimate the values of the variable (v1, v2) (thresholding operator TH)
|
||||
estimateV(I1wx, I1wy, u1, u2, grad, rho_c, v1, v2, l_t); |
||||
|
||||
// compute the divergence of the dual variable (p1, p2)
|
||||
divergence(p11, p12, div_p1); |
||||
divergence(p21, p22, div_p2); |
||||
|
||||
// estimate the values of the optical flow (u1, u2)
|
||||
error = estimateU(v1, v2, div_p1, div_p2, u1, u2, static_cast<float>(theta)); |
||||
|
||||
// compute the gradient of the optical flow (Du1, Du2)
|
||||
forwardGradient(u1, u1x, u1y); |
||||
forwardGradient(u2, u2x, u2y); |
||||
|
||||
// estimate the values of the dual variable (p1, p2)
|
||||
estimateDualVariables(u1x, u1y, u2x, u2y, p11, p12, p21, p22, taut); |
||||
} |
||||
} |
||||
} |
||||
|
||||
void OpticalFlowDual_TVL1::collectGarbage() |
||||
{ |
||||
I0s.clear(); |
||||
I1s.clear(); |
||||
u1s.clear(); |
||||
u2s.clear(); |
||||
|
||||
I1x_buf.release(); |
||||
I1y_buf.release(); |
||||
|
||||
flowMap1_buf.release(); |
||||
flowMap2_buf.release(); |
||||
|
||||
I1w_buf.release(); |
||||
I1wx_buf.release(); |
||||
I1wy_buf.release(); |
||||
|
||||
grad_buf.release(); |
||||
rho_c_buf.release(); |
||||
|
||||
v1_buf.release(); |
||||
v2_buf.release(); |
||||
|
||||
p11_buf.release(); |
||||
p12_buf.release(); |
||||
p21_buf.release(); |
||||
p22_buf.release(); |
||||
|
||||
div_p1_buf.release(); |
||||
div_p2_buf.release(); |
||||
|
||||
u1x_buf.release(); |
||||
u1y_buf.release(); |
||||
u2x_buf.release(); |
||||
u2y_buf.release(); |
||||
} |
||||
|
||||
CV_INIT_ALGORITHM(OpticalFlowDual_TVL1, "DenseOpticalFlow.DualTVL1", |
||||
obj.info()->addParam(obj, "tau", obj.tau, false, 0, 0, |
||||
"Time step of the numerical scheme"); |
||||
obj.info()->addParam(obj, "lambda", obj.lambda, false, 0, 0, |
||||
"Weight parameter for the data term, attachment parameter"); |
||||
obj.info()->addParam(obj, "theta", obj.theta, false, 0, 0, |
||||
"Weight parameter for (u - v)^2, tightness parameter"); |
||||
obj.info()->addParam(obj, "nscales", obj.nscales, false, 0, 0, |
||||
"Number of scales used to create the pyramid of images"); |
||||
obj.info()->addParam(obj, "warps", obj.warps, false, 0, 0, |
||||
"Number of warpings per scale"); |
||||
obj.info()->addParam(obj, "epsilon", obj.epsilon, false, 0, 0, |
||||
"Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time"); |
||||
obj.info()->addParam(obj, "iterations", obj.iterations, false, 0, 0, |
||||
"Stopping criterion iterations number used in the numerical scheme"); |
||||
obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow)); |
||||
|
||||
} // namespace
|
||||
|
||||
Ptr<DenseOpticalFlow> cv::createOptFlow_DualTVL1() |
||||
{ |
||||
return new OpticalFlowDual_TVL1; |
||||
} |
@ -0,0 +1,171 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
#include <fstream> |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cvtest; |
||||
|
||||
//#define DUMP
|
||||
|
||||
namespace |
||||
{ |
||||
// first four bytes, should be the same in little endian
|
||||
const float FLO_TAG_FLOAT = 202021.25f; // check for this when READING the file
|
||||
const char FLO_TAG_STRING[] = "PIEH"; // use this when WRITING the file
|
||||
|
||||
// binary file format for flow data specified here:
|
||||
// http://vision.middlebury.edu/flow/data/
|
||||
void writeOpticalFlowToFile(const Mat_<Point2f>& flow, const string& fileName) |
||||
{ |
||||
ofstream file(fileName.c_str(), ios_base::binary); |
||||
|
||||
file << FLO_TAG_STRING; |
||||
|
||||
file.write((const char*) &flow.cols, sizeof(int)); |
||||
file.write((const char*) &flow.rows, sizeof(int)); |
||||
|
||||
for (int i = 0; i < flow.rows; ++i) |
||||
{ |
||||
for (int j = 0; j < flow.cols; ++j) |
||||
{ |
||||
const Point2f u = flow(i, j); |
||||
|
||||
file.write((const char*) &u.x, sizeof(float)); |
||||
file.write((const char*) &u.y, sizeof(float)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// binary file format for flow data specified here:
|
||||
// http://vision.middlebury.edu/flow/data/
|
||||
void readOpticalFlowFromFile(Mat_<Point2f>& flow, const string& fileName) |
||||
{ |
||||
ifstream file(fileName.c_str(), ios_base::binary); |
||||
|
||||
float tag; |
||||
file.read((char*) &tag, sizeof(float)); |
||||
CV_Assert( tag == FLO_TAG_FLOAT ); |
||||
|
||||
Size size; |
||||
|
||||
file.read((char*) &size.width, sizeof(int)); |
||||
file.read((char*) &size.height, sizeof(int)); |
||||
|
||||
flow.create(size); |
||||
|
||||
for (int i = 0; i < flow.rows; ++i) |
||||
{ |
||||
for (int j = 0; j < flow.cols; ++j) |
||||
{ |
||||
Point2f u; |
||||
|
||||
file.read((char*) &u.x, sizeof(float)); |
||||
file.read((char*) &u.y, sizeof(float)); |
||||
|
||||
flow(i, j) = u; |
||||
} |
||||
} |
||||
} |
||||
|
||||
bool isFlowCorrect(Point2f u) |
||||
{ |
||||
return !cvIsNaN(u.x) && !cvIsNaN(u.y) && (fabs(u.x) < 1e9) && (fabs(u.y) < 1e9); |
||||
} |
||||
|
||||
double calcRMSE(const Mat_<Point2f>& flow1, const Mat_<Point2f>& flow2) |
||||
{ |
||||
double sum = 0.0; |
||||
int counter = 0; |
||||
|
||||
for (int i = 0; i < flow1.rows; ++i) |
||||
{ |
||||
for (int j = 0; j < flow1.cols; ++j) |
||||
{ |
||||
const Point2f u1 = flow1(i, j); |
||||
const Point2f u2 = flow2(i, j); |
||||
|
||||
if (isFlowCorrect(u1) && isFlowCorrect(u2)) |
||||
{ |
||||
const Point2f diff = u1 - u2; |
||||
sum += diff.ddot(diff); |
||||
++counter; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return sqrt(sum / (1e-9 + counter)); |
||||
} |
||||
} |
||||
|
||||
TEST(Video_calcOpticalFlowDual_TVL1, Regression) |
||||
{ |
||||
const double MAX_RMSE = 0.02; |
||||
|
||||
const string frame1_path = TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"; |
||||
const string frame2_path = TS::ptr()->get_data_path() + "optflow/RubberWhale2.png"; |
||||
const string gold_flow_path = TS::ptr()->get_data_path() + "optflow/tvl1_flow.flo"; |
||||
|
||||
Mat frame1 = imread(frame1_path, IMREAD_GRAYSCALE); |
||||
Mat frame2 = imread(frame2_path, IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(frame1.empty()); |
||||
ASSERT_FALSE(frame2.empty()); |
||||
|
||||
Mat_<Point2f> flow; |
||||
Ptr<DenseOpticalFlow> tvl1 = createOptFlow_DualTVL1(); |
||||
|
||||
tvl1->calc(frame1, frame2, flow); |
||||
|
||||
#ifdef DUMP |
||||
writeOpticalFlowToFile(flow, gold_flow_path); |
||||
#else |
||||
Mat_<Point2f> gold; |
||||
readOpticalFlowFromFile(gold, gold_flow_path); |
||||
|
||||
ASSERT_EQ(gold.rows, flow.rows); |
||||
ASSERT_EQ(gold.cols, flow.cols); |
||||
|
||||
const double err = calcRMSE(gold, flow); |
||||
EXPECT_LE(err, MAX_RMSE); |
||||
#endif |
||||
} |
@ -0,0 +1,193 @@ |
||||
#include <iostream> |
||||
#include <fstream> |
||||
|
||||
#include "opencv2/video/tracking.hpp" |
||||
#include "opencv2/highgui/highgui.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace std; |
||||
|
||||
inline bool isFlowCorrect(Point2f u) |
||||
{ |
||||
return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9; |
||||
} |
||||
|
||||
static Vec3b computeColor(float fx, float fy) |
||||
{ |
||||
static bool first = true; |
||||
|
||||
// relative lengths of color transitions:
|
||||
// these are chosen based on perceptual similarity
|
||||
// (e.g. one can distinguish more shades between red and yellow
|
||||
// than between yellow and green)
|
||||
const int RY = 15; |
||||
const int YG = 6; |
||||
const int GC = 4; |
||||
const int CB = 11; |
||||
const int BM = 13; |
||||
const int MR = 6; |
||||
const int NCOLS = RY + YG + GC + CB + BM + MR; |
||||
static Vec3i colorWheel[NCOLS]; |
||||
|
||||
if (first) |
||||
{ |
||||
int k = 0; |
||||
|
||||
for (int i = 0; i < RY; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255, 255 * i / RY, 0); |
||||
|
||||
for (int i = 0; i < YG; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0); |
||||
|
||||
for (int i = 0; i < GC; ++i, ++k) |
||||
colorWheel[k] = Vec3i(0, 255, 255 * i / GC); |
||||
|
||||
for (int i = 0; i < CB; ++i, ++k) |
||||
colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255); |
||||
|
||||
for (int i = 0; i < BM; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255 * i / BM, 0, 255); |
||||
|
||||
for (int i = 0; i < MR; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR); |
||||
|
||||
first = false; |
||||
} |
||||
|
||||
const float rad = sqrt(fx * fx + fy * fy); |
||||
const float a = atan2(-fy, -fx) / (float)CV_PI; |
||||
|
||||
const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1); |
||||
const int k0 = static_cast<int>(fk); |
||||
const int k1 = (k0 + 1) % NCOLS; |
||||
const float f = fk - k0; |
||||
|
||||
Vec3b pix; |
||||
|
||||
for (int b = 0; b < 3; b++) |
||||
{ |
||||
const float col0 = colorWheel[k0][b] / 255.f; |
||||
const float col1 = colorWheel[k1][b] / 255.f; |
||||
|
||||
float col = (1 - f) * col0 + f * col1; |
||||
|
||||
if (rad <= 1) |
||||
col = 1 - rad * (1 - col); // increase saturation with radius
|
||||
else |
||||
col *= .75; // out of range
|
||||
|
||||
pix[2 - b] = static_cast<uchar>(255.f * col); |
||||
} |
||||
|
||||
return pix; |
||||
} |
||||
|
||||
static void drawOpticalFlow(const Mat_<Point2f>& flow, Mat& dst, float maxmotion = -1) |
||||
{ |
||||
dst.create(flow.size(), CV_8UC3); |
||||
dst.setTo(Scalar::all(0)); |
||||
|
||||
// determine motion range:
|
||||
float maxrad = maxmotion; |
||||
|
||||
if (maxmotion <= 0) |
||||
{ |
||||
maxrad = 1; |
||||
for (int y = 0; y < flow.rows; ++y) |
||||
{ |
||||
for (int x = 0; x < flow.cols; ++x) |
||||
{ |
||||
Point2f u = flow(y, x); |
||||
|
||||
if (!isFlowCorrect(u)) |
||||
continue; |
||||
|
||||
maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
for (int y = 0; y < flow.rows; ++y) |
||||
{ |
||||
for (int x = 0; x < flow.cols; ++x) |
||||
{ |
||||
Point2f u = flow(y, x); |
||||
|
||||
if (isFlowCorrect(u)) |
||||
dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// binary file format for flow data specified here:
|
||||
// http://vision.middlebury.edu/flow/data/
|
||||
static void writeOpticalFlowToFile(const Mat_<Point2f>& flow, const string& fileName) |
||||
{ |
||||
static const char FLO_TAG_STRING[] = "PIEH"; |
||||
|
||||
ofstream file(fileName.c_str(), ios_base::binary); |
||||
|
||||
file << FLO_TAG_STRING; |
||||
|
||||
file.write((const char*) &flow.cols, sizeof(int)); |
||||
file.write((const char*) &flow.rows, sizeof(int)); |
||||
|
||||
for (int i = 0; i < flow.rows; ++i) |
||||
{ |
||||
for (int j = 0; j < flow.cols; ++j) |
||||
{ |
||||
const Point2f u = flow(i, j); |
||||
|
||||
file.write((const char*) &u.x, sizeof(float)); |
||||
file.write((const char*) &u.y, sizeof(float)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
int main(int argc, const char* argv[]) |
||||
{ |
||||
if (argc < 3) |
||||
{ |
||||
cerr << "Usage : " << argv[0] << "<frame0> <frame1> [<output_flow>]" << endl; |
||||
return -1; |
||||
} |
||||
|
||||
Mat frame0 = imread(argv[1], IMREAD_GRAYSCALE); |
||||
Mat frame1 = imread(argv[2], IMREAD_GRAYSCALE); |
||||
|
||||
if (frame0.empty()) |
||||
{ |
||||
cerr << "Can't open image [" << argv[1] << "]" << endl; |
||||
return -1; |
||||
} |
||||
if (frame1.empty()) |
||||
{ |
||||
cerr << "Can't open image [" << argv[2] << "]" << endl; |
||||
return -1; |
||||
} |
||||
|
||||
if (frame1.size() != frame0.size()) |
||||
{ |
||||
cerr << "Images should be of equal sizes" << endl; |
||||
return -1; |
||||
} |
||||
|
||||
Mat_<Point2f> flow; |
||||
Ptr<DenseOpticalFlow> tvl1 = createOptFlow_DualTVL1(); |
||||
|
||||
const double start = (double)getTickCount(); |
||||
tvl1->calc(frame0, frame1, flow); |
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "calcOpticalFlowDual_TVL1 : " << timeSec << " sec" << endl; |
||||
|
||||
Mat out; |
||||
drawOpticalFlow(flow, out); |
||||
|
||||
if (argc == 4) |
||||
writeOpticalFlowToFile(flow, argv[3]); |
||||
|
||||
imshow("Flow", out); |
||||
waitKey(); |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,89 @@ |
||||
#include <cmath> |
||||
#include <iostream> |
||||
|
||||
#include "opencv2/core/core.hpp" |
||||
#include "opencv2/highgui/highgui.hpp" |
||||
#include "opencv2/imgproc/imgproc.hpp" |
||||
#include "opencv2/gpu/gpu.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
static void help() |
||||
{ |
||||
cout << "This program demonstrates line finding with the Hough transform." << endl; |
||||
cout << "Usage:" << endl; |
||||
cout << "./gpu-example-houghlines <image_name>, Default is pic1.png\n" << endl; |
||||
} |
||||
|
||||
int main(int argc, const char* argv[]) |
||||
{ |
||||
const string filename = argc >= 2 ? argv[1] : "pic1.png"; |
||||
|
||||
Mat src = imread(filename, IMREAD_GRAYSCALE); |
||||
if (src.empty()) |
||||
{ |
||||
help(); |
||||
cout << "can not open " << filename << endl; |
||||
return -1; |
||||
} |
||||
|
||||
Mat mask; |
||||
Canny(src, mask, 100, 200, 3); |
||||
|
||||
Mat dst_cpu; |
||||
cvtColor(mask, dst_cpu, CV_GRAY2BGR); |
||||
Mat dst_gpu = dst_cpu.clone(); |
||||
|
||||
vector<Vec4i> lines_cpu; |
||||
{ |
||||
const int64 start = getTickCount(); |
||||
|
||||
HoughLinesP(mask, lines_cpu, 1, CV_PI / 180, 50, 60, 5); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "CPU Time : " << timeSec * 1000 << " ms" << endl; |
||||
cout << "CPU Found : " << lines_cpu.size() << endl; |
||||
} |
||||
|
||||
for (size_t i = 0; i < lines_cpu.size(); ++i) |
||||
{ |
||||
Vec4i l = lines_cpu[i]; |
||||
line(dst_cpu, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, CV_AA); |
||||
} |
||||
|
||||
GpuMat d_src(mask); |
||||
GpuMat d_lines; |
||||
HoughLinesBuf d_buf; |
||||
{ |
||||
const int64 start = getTickCount(); |
||||
|
||||
gpu::HoughLinesP(d_src, d_lines, d_buf, 1.0f, (float) (CV_PI / 180.0f), 50, 5); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "GPU Time : " << timeSec * 1000 << " ms" << endl; |
||||
cout << "GPU Found : " << d_lines.cols << endl; |
||||
} |
||||
vector<Vec4i> lines_gpu; |
||||
if (!d_lines.empty()) |
||||
{ |
||||
lines_gpu.resize(d_lines.cols); |
||||
Mat h_lines(1, d_lines.cols, CV_32SC4, &lines_gpu[0]); |
||||
d_lines.download(h_lines); |
||||
} |
||||
|
||||
for (size_t i = 0; i < lines_gpu.size(); ++i) |
||||
{ |
||||
Vec4i l = lines_gpu[i]; |
||||
line(dst_gpu, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, CV_AA); |
||||
} |
||||
|
||||
imshow("source", src); |
||||
imshow("detected lines [CPU]", dst_cpu); |
||||
imshow("detected lines [GPU]", dst_gpu); |
||||
waitKey(); |
||||
|
||||
return 0; |
||||
} |
||||
|
@ -0,0 +1,253 @@ |
||||
#include <iostream> |
||||
#include <fstream> |
||||
|
||||
#include "opencv2/core/core.hpp" |
||||
#include "opencv2/highgui/highgui.hpp" |
||||
#include "opencv2/gpu/gpu.hpp" |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
|
||||
inline bool isFlowCorrect(Point2f u) |
||||
{ |
||||
return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9; |
||||
} |
||||
|
||||
static Vec3b computeColor(float fx, float fy) |
||||
{ |
||||
static bool first = true; |
||||
|
||||
// relative lengths of color transitions:
|
||||
// these are chosen based on perceptual similarity
|
||||
// (e.g. one can distinguish more shades between red and yellow
|
||||
// than between yellow and green)
|
||||
const int RY = 15; |
||||
const int YG = 6; |
||||
const int GC = 4; |
||||
const int CB = 11; |
||||
const int BM = 13; |
||||
const int MR = 6; |
||||
const int NCOLS = RY + YG + GC + CB + BM + MR; |
||||
static Vec3i colorWheel[NCOLS]; |
||||
|
||||
if (first) |
||||
{ |
||||
int k = 0; |
||||
|
||||
for (int i = 0; i < RY; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255, 255 * i / RY, 0); |
||||
|
||||
for (int i = 0; i < YG; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0); |
||||
|
||||
for (int i = 0; i < GC; ++i, ++k) |
||||
colorWheel[k] = Vec3i(0, 255, 255 * i / GC); |
||||
|
||||
for (int i = 0; i < CB; ++i, ++k) |
||||
colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255); |
||||
|
||||
for (int i = 0; i < BM; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255 * i / BM, 0, 255); |
||||
|
||||
for (int i = 0; i < MR; ++i, ++k) |
||||
colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR); |
||||
|
||||
first = false; |
||||
} |
||||
|
||||
const float rad = sqrt(fx * fx + fy * fy); |
||||
const float a = atan2(-fy, -fx) / (float) CV_PI; |
||||
|
||||
const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1); |
||||
const int k0 = static_cast<int>(fk); |
||||
const int k1 = (k0 + 1) % NCOLS; |
||||
const float f = fk - k0; |
||||
|
||||
Vec3b pix; |
||||
|
||||
for (int b = 0; b < 3; b++) |
||||
{ |
||||
const float col0 = colorWheel[k0][b] / 255.0f; |
||||
const float col1 = colorWheel[k1][b] / 255.0f; |
||||
|
||||
float col = (1 - f) * col0 + f * col1; |
||||
|
||||
if (rad <= 1) |
||||
col = 1 - rad * (1 - col); // increase saturation with radius
|
||||
else |
||||
col *= .75; // out of range
|
||||
|
||||
pix[2 - b] = static_cast<uchar>(255.0 * col); |
||||
} |
||||
|
||||
return pix; |
||||
} |
||||
|
||||
static void drawOpticalFlow(const Mat_<float>& flowx, const Mat_<float>& flowy, Mat& dst, float maxmotion = -1) |
||||
{ |
||||
dst.create(flowx.size(), CV_8UC3); |
||||
dst.setTo(Scalar::all(0)); |
||||
|
||||
// determine motion range:
|
||||
float maxrad = maxmotion; |
||||
|
||||
if (maxmotion <= 0) |
||||
{ |
||||
maxrad = 1; |
||||
for (int y = 0; y < flowx.rows; ++y) |
||||
{ |
||||
for (int x = 0; x < flowx.cols; ++x) |
||||
{ |
||||
Point2f u(flowx(y, x), flowy(y, x)); |
||||
|
||||
if (!isFlowCorrect(u)) |
||||
continue; |
||||
|
||||
maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
for (int y = 0; y < flowx.rows; ++y) |
||||
{ |
||||
for (int x = 0; x < flowx.cols; ++x) |
||||
{ |
||||
Point2f u(flowx(y, x), flowy(y, x)); |
||||
|
||||
if (isFlowCorrect(u)) |
||||
dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad); |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void showFlow(const char* name, const GpuMat& d_flowx, const GpuMat& d_flowy) |
||||
{ |
||||
Mat flowx(d_flowx); |
||||
Mat flowy(d_flowy); |
||||
|
||||
Mat out; |
||||
drawOpticalFlow(flowx, flowy, out, 10); |
||||
|
||||
imshow(name, out); |
||||
} |
||||
|
||||
int main(int argc, const char* argv[]) |
||||
{ |
||||
if (argc < 3) |
||||
{ |
||||
cerr << "Usage : " << argv[0] << "<frame0> <frame1>" << endl; |
||||
return -1; |
||||
} |
||||
|
||||
Mat frame0 = imread(argv[1], IMREAD_GRAYSCALE); |
||||
Mat frame1 = imread(argv[2], IMREAD_GRAYSCALE); |
||||
|
||||
if (frame0.empty()) |
||||
{ |
||||
cerr << "Can't open image [" << argv[1] << "]" << endl; |
||||
return -1; |
||||
} |
||||
if (frame1.empty()) |
||||
{ |
||||
cerr << "Can't open image [" << argv[2] << "]" << endl; |
||||
return -1; |
||||
} |
||||
|
||||
if (frame1.size() != frame0.size()) |
||||
{ |
||||
cerr << "Images should be of equal sizes" << endl; |
||||
return -1; |
||||
} |
||||
|
||||
GpuMat d_frame0(frame0); |
||||
GpuMat d_frame1(frame1); |
||||
|
||||
GpuMat d_flowx(frame0.size(), CV_32FC1); |
||||
GpuMat d_flowy(frame0.size(), CV_32FC1); |
||||
|
||||
BroxOpticalFlow brox(0.197f, 50.0f, 0.8f, 10, 77, 10); |
||||
PyrLKOpticalFlow lk; lk.winSize = Size(7, 7); |
||||
FarnebackOpticalFlow farn; |
||||
OpticalFlowDual_TVL1_GPU tvl1; |
||||
FastOpticalFlowBM fastBM; |
||||
|
||||
{ |
||||
GpuMat d_frame0f; |
||||
GpuMat d_frame1f; |
||||
|
||||
d_frame0.convertTo(d_frame0f, CV_32F, 1.0 / 255.0); |
||||
d_frame1.convertTo(d_frame1f, CV_32F, 1.0 / 255.0); |
||||
|
||||
const int64 start = getTickCount(); |
||||
|
||||
brox(d_frame0f, d_frame1f, d_flowx, d_flowy); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "Brox : " << timeSec << " sec" << endl; |
||||
|
||||
showFlow("Brox", d_flowx, d_flowy); |
||||
} |
||||
|
||||
{ |
||||
const int64 start = getTickCount(); |
||||
|
||||
lk.dense(d_frame0, d_frame1, d_flowx, d_flowy); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "LK : " << timeSec << " sec" << endl; |
||||
|
||||
showFlow("LK", d_flowx, d_flowy); |
||||
} |
||||
|
||||
{ |
||||
const int64 start = getTickCount(); |
||||
|
||||
farn(d_frame0, d_frame1, d_flowx, d_flowy); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "Farn : " << timeSec << " sec" << endl; |
||||
|
||||
showFlow("Farn", d_flowx, d_flowy); |
||||
} |
||||
|
||||
{ |
||||
const int64 start = getTickCount(); |
||||
|
||||
tvl1(d_frame0, d_frame1, d_flowx, d_flowy); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "TVL1 : " << timeSec << " sec" << endl; |
||||
|
||||
showFlow("TVL1", d_flowx, d_flowy); |
||||
} |
||||
|
||||
{ |
||||
const int64 start = getTickCount(); |
||||
|
||||
GpuMat buf; |
||||
calcOpticalFlowBM(d_frame0, d_frame1, Size(7, 7), Size(1, 1), Size(21, 21), false, d_flowx, d_flowy, buf); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "BM : " << timeSec << " sec" << endl; |
||||
|
||||
showFlow("BM", d_flowx, d_flowy); |
||||
} |
||||
|
||||
{ |
||||
const int64 start = getTickCount(); |
||||
|
||||
fastBM(d_frame0, d_frame1, d_flowx, d_flowy); |
||||
|
||||
const double timeSec = (getTickCount() - start) / getTickFrequency(); |
||||
cout << "Fast BM : " << timeSec << " sec" << endl; |
||||
|
||||
showFlow("Fast BM", d_flowx, d_flowy); |
||||
} |
||||
|
||||
imshow("Frame 0", frame0); |
||||
imshow("Frame 1", frame1); |
||||
waitKey(); |
||||
|
||||
return 0; |
||||
} |
Loading…
Reference in new issue