* use different approaches -> threads and streams * clean up codepull/1355/head
parent
cd5b8af609
commit
d87eb75904
1 changed files with 448 additions and 101 deletions
@ -1,149 +1,496 @@ |
|||||||
/* This sample demonstrates working on one piece of data using two GPUs.
|
// This sample demonstrates working on one piece of data using two GPUs.
|
||||||
It splits input into two parts and processes them separately on different |
// It splits input into two parts and processes them separately on different GPUs.
|
||||||
GPUs. */ |
|
||||||
|
|
||||||
// Disable some warnings which are caused with CUDA headers
|
#ifdef WIN32 |
||||||
#if defined(_MSC_VER) |
#define NOMINMAX |
||||||
#pragma warning(disable: 4201 4408 4100) |
#include <windows.h> |
||||||
|
#else |
||||||
|
#include <pthread.h> |
||||||
|
#include <unistd.h> |
||||||
#endif |
#endif |
||||||
|
|
||||||
#include <iostream> |
#include <iostream> |
||||||
#include "cvconfig.h" |
#include <iomanip> |
||||||
|
|
||||||
#include "opencv2/core/core.hpp" |
#include "opencv2/core/core.hpp" |
||||||
#include "opencv2/highgui/highgui.hpp" |
#include "opencv2/highgui/highgui.hpp" |
||||||
|
#include "opencv2/imgproc/imgproc.hpp" |
||||||
|
#include "opencv2/contrib/contrib.hpp" |
||||||
#include "opencv2/gpu/gpu.hpp" |
#include "opencv2/gpu/gpu.hpp" |
||||||
|
|
||||||
#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) |
using namespace std; |
||||||
|
using namespace cv; |
||||||
|
using namespace cv::gpu; |
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Thread
|
||||||
|
// OS-specific wrappers for multi-threading
|
||||||
|
|
||||||
int main() |
#ifdef WIN32 |
||||||
|
class Thread |
||||||
{ |
{ |
||||||
#if !defined(HAVE_CUDA) |
struct UserData |
||||||
std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n"; |
{ |
||||||
#endif |
void (*func)(void* userData); |
||||||
|
void* param; |
||||||
|
}; |
||||||
|
|
||||||
|
static DWORD WINAPI WinThreadFunction(LPVOID lpParam) |
||||||
|
{ |
||||||
|
UserData* userData = static_cast<UserData*>(lpParam); |
||||||
|
|
||||||
|
userData->func(userData->param); |
||||||
|
|
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
UserData userData_; |
||||||
|
HANDLE thread_; |
||||||
|
DWORD threadId_; |
||||||
|
|
||||||
|
public: |
||||||
|
Thread(void (*func)(void* userData), void* userData) |
||||||
|
{ |
||||||
|
userData_.func = func; |
||||||
|
userData_.param = userData; |
||||||
|
|
||||||
|
thread_ = CreateThread( |
||||||
|
NULL, // default security attributes
|
||||||
|
0, // use default stack size
|
||||||
|
WinThreadFunction, // thread function name
|
||||||
|
&userData_, // argument to thread function
|
||||||
|
0, // use default creation flags
|
||||||
|
&threadId_); // returns the thread identifier
|
||||||
|
} |
||||||
|
|
||||||
|
~Thread() |
||||||
|
{ |
||||||
|
CloseHandle(thread_); |
||||||
|
} |
||||||
|
|
||||||
|
void wait() |
||||||
|
{ |
||||||
|
WaitForSingleObject(thread_, INFINITE); |
||||||
|
} |
||||||
|
}; |
||||||
|
#else |
||||||
|
class Thread |
||||||
|
{ |
||||||
|
struct UserData |
||||||
|
{ |
||||||
|
void (*func)(void* userData); |
||||||
|
void* param; |
||||||
|
}; |
||||||
|
|
||||||
|
static void* PThreadFunction(void* lpParam) |
||||||
|
{ |
||||||
|
UserData* userData = static_cast<UserData*>(lpParam); |
||||||
|
|
||||||
|
userData->func(userData->param); |
||||||
|
|
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
pthread_t thread_; |
||||||
|
UserData userData_; |
||||||
|
|
||||||
|
public: |
||||||
|
Thread(void (*func)(void* userData), void* userData) |
||||||
|
{ |
||||||
|
userData_.func = func; |
||||||
|
userData_.param = userData; |
||||||
|
|
||||||
|
pthread_create(&thread_, NULL, PThreadFunction, &userData_); |
||||||
|
} |
||||||
|
|
||||||
#if !defined(HAVE_TBB) |
~Thread() |
||||||
std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; |
{ |
||||||
|
pthread_detach(thread_); |
||||||
|
} |
||||||
|
|
||||||
|
void wait() |
||||||
|
{ |
||||||
|
pthread_join(thread_, NULL); |
||||||
|
} |
||||||
|
}; |
||||||
#endif |
#endif |
||||||
|
|
||||||
return 0; |
///////////////////////////////////////////////////////////
|
||||||
|
// StereoSingleGpu
|
||||||
|
// Run Stereo algorithm on single GPU
|
||||||
|
|
||||||
|
class StereoSingleGpu |
||||||
|
{ |
||||||
|
public: |
||||||
|
explicit StereoSingleGpu(int deviceId = 0); |
||||||
|
~StereoSingleGpu(); |
||||||
|
|
||||||
|
void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity); |
||||||
|
|
||||||
|
private: |
||||||
|
int deviceId_; |
||||||
|
GpuMat d_leftFrame; |
||||||
|
GpuMat d_rightFrame; |
||||||
|
GpuMat d_disparity; |
||||||
|
Ptr<StereoBM_GPU> d_alg; |
||||||
|
}; |
||||||
|
|
||||||
|
StereoSingleGpu::StereoSingleGpu(int deviceId) : deviceId_(deviceId) |
||||||
|
{ |
||||||
|
gpu::setDevice(deviceId_); |
||||||
|
d_alg = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256); |
||||||
} |
} |
||||||
|
|
||||||
#else |
StereoSingleGpu::~StereoSingleGpu() |
||||||
|
{ |
||||||
|
gpu::setDevice(deviceId_); |
||||||
|
d_leftFrame.release(); |
||||||
|
d_rightFrame.release(); |
||||||
|
d_disparity.release(); |
||||||
|
d_alg.release(); |
||||||
|
} |
||||||
|
|
||||||
#include "opencv2/core/internal.hpp" // For TBB wrappers |
void StereoSingleGpu::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity) |
||||||
|
{ |
||||||
|
gpu::setDevice(deviceId_); |
||||||
|
d_leftFrame.upload(leftFrame); |
||||||
|
d_rightFrame.upload(rightFrame); |
||||||
|
(*d_alg)(d_leftFrame, d_rightFrame, d_disparity); |
||||||
|
d_disparity.download(disparity); |
||||||
|
} |
||||||
|
|
||||||
using namespace std; |
///////////////////////////////////////////////////////////
|
||||||
using namespace cv; |
// StereoMultiGpuThread
|
||||||
using namespace cv::gpu; |
// Run Stereo algorithm on two GPUs using different host threads
|
||||||
|
|
||||||
|
class StereoMultiGpuThread |
||||||
|
{ |
||||||
|
public: |
||||||
|
StereoMultiGpuThread(); |
||||||
|
~StereoMultiGpuThread(); |
||||||
|
|
||||||
|
void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity); |
||||||
|
|
||||||
|
private: |
||||||
|
GpuMat d_leftFrames[2]; |
||||||
|
GpuMat d_rightFrames[2]; |
||||||
|
GpuMat d_disparities[2]; |
||||||
|
Ptr<StereoBM_GPU> d_algs[2]; |
||||||
|
|
||||||
|
struct StereoLaunchData |
||||||
|
{ |
||||||
|
int deviceId; |
||||||
|
Mat leftFrame; |
||||||
|
Mat rightFrame; |
||||||
|
Mat disparity; |
||||||
|
GpuMat* d_leftFrame; |
||||||
|
GpuMat* d_rightFrame; |
||||||
|
GpuMat* d_disparity; |
||||||
|
Ptr<StereoBM_GPU> d_alg; |
||||||
|
}; |
||||||
|
|
||||||
|
static void launchGpuStereoAlg(void* userData); |
||||||
|
}; |
||||||
|
|
||||||
|
StereoMultiGpuThread::StereoMultiGpuThread() |
||||||
|
{ |
||||||
|
gpu::setDevice(0); |
||||||
|
d_algs[0] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256); |
||||||
|
|
||||||
|
gpu::setDevice(1); |
||||||
|
d_algs[1] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256); |
||||||
|
} |
||||||
|
|
||||||
|
StereoMultiGpuThread::~StereoMultiGpuThread() |
||||||
|
{ |
||||||
|
gpu::setDevice(0); |
||||||
|
d_leftFrames[0].release(); |
||||||
|
d_rightFrames[0].release(); |
||||||
|
d_disparities[0].release(); |
||||||
|
d_algs[0].release(); |
||||||
|
|
||||||
|
gpu::setDevice(1); |
||||||
|
d_leftFrames[1].release(); |
||||||
|
d_rightFrames[1].release(); |
||||||
|
d_disparities[1].release(); |
||||||
|
d_algs[1].release(); |
||||||
|
} |
||||||
|
|
||||||
|
void StereoMultiGpuThread::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity) |
||||||
|
{ |
||||||
|
disparity.create(leftFrame.size(), CV_8UC1); |
||||||
|
|
||||||
|
// Split input data onto two parts for each GPUs.
|
||||||
|
// We add small border for each part,
|
||||||
|
// because original algorithm doesn't calculate disparity on image borders.
|
||||||
|
// With such padding we will get output in the middle of final result.
|
||||||
|
|
||||||
|
StereoLaunchData launchDatas[2]; |
||||||
|
|
||||||
|
launchDatas[0].deviceId = 0; |
||||||
|
launchDatas[0].leftFrame = leftFrame.rowRange(0, leftFrame.rows / 2 + 32); |
||||||
|
launchDatas[0].rightFrame = rightFrame.rowRange(0, rightFrame.rows / 2 + 32); |
||||||
|
launchDatas[0].disparity = disparity.rowRange(0, leftFrame.rows / 2); |
||||||
|
launchDatas[0].d_leftFrame = &d_leftFrames[0]; |
||||||
|
launchDatas[0].d_rightFrame = &d_rightFrames[0]; |
||||||
|
launchDatas[0].d_disparity = &d_disparities[0]; |
||||||
|
launchDatas[0].d_alg = d_algs[0]; |
||||||
|
|
||||||
|
launchDatas[1].deviceId = 1; |
||||||
|
launchDatas[1].leftFrame = leftFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows); |
||||||
|
launchDatas[1].rightFrame = rightFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows); |
||||||
|
launchDatas[1].disparity = disparity.rowRange(leftFrame.rows / 2, leftFrame.rows); |
||||||
|
launchDatas[1].d_leftFrame = &d_leftFrames[1]; |
||||||
|
launchDatas[1].d_rightFrame = &d_rightFrames[1]; |
||||||
|
launchDatas[1].d_disparity = &d_disparities[1]; |
||||||
|
launchDatas[1].d_alg = d_algs[1]; |
||||||
|
|
||||||
struct Worker { void operator()(int device_id) const; }; |
Thread thread0(launchGpuStereoAlg, &launchDatas[0]); |
||||||
|
Thread thread1(launchGpuStereoAlg, &launchDatas[1]); |
||||||
|
|
||||||
// GPUs data
|
thread0.wait(); |
||||||
GpuMat d_left[2]; |
thread1.wait(); |
||||||
GpuMat d_right[2]; |
} |
||||||
StereoBM_GPU* bm[2]; |
|
||||||
GpuMat d_result[2]; |
|
||||||
|
|
||||||
static void printHelp() |
void StereoMultiGpuThread::launchGpuStereoAlg(void* userData) |
||||||
{ |
{ |
||||||
std::cout << "Usage: stereo_multi_gpu --left <image> --right <image>\n"; |
StereoLaunchData* data = static_cast<StereoLaunchData*>(userData); |
||||||
|
|
||||||
|
gpu::setDevice(data->deviceId); |
||||||
|
data->d_leftFrame->upload(data->leftFrame); |
||||||
|
data->d_rightFrame->upload(data->rightFrame); |
||||||
|
(*data->d_alg)(*data->d_leftFrame, *data->d_rightFrame, *data->d_disparity); |
||||||
|
|
||||||
|
if (data->deviceId == 0) |
||||||
|
data->d_disparity->rowRange(0, data->d_disparity->rows - 32).download(data->disparity); |
||||||
|
else |
||||||
|
data->d_disparity->rowRange(32, data->d_disparity->rows).download(data->disparity); |
||||||
} |
} |
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// StereoMultiGpuStream
|
||||||
|
// Run Stereo algorithm on two GPUs from single host thread using async API
|
||||||
|
|
||||||
|
class StereoMultiGpuStream |
||||||
|
{ |
||||||
|
public: |
||||||
|
StereoMultiGpuStream(); |
||||||
|
~StereoMultiGpuStream(); |
||||||
|
|
||||||
|
void compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity); |
||||||
|
|
||||||
|
private: |
||||||
|
GpuMat d_leftFrames[2]; |
||||||
|
GpuMat d_rightFrames[2]; |
||||||
|
GpuMat d_disparities[2]; |
||||||
|
Ptr<StereoBM_GPU> d_algs[2]; |
||||||
|
Ptr<Stream> streams[2]; |
||||||
|
}; |
||||||
|
|
||||||
|
StereoMultiGpuStream::StereoMultiGpuStream() |
||||||
|
{ |
||||||
|
gpu::setDevice(0); |
||||||
|
d_algs[0] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256); |
||||||
|
streams[0] = new Stream; |
||||||
|
|
||||||
|
gpu::setDevice(1); |
||||||
|
d_algs[1] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256); |
||||||
|
streams[1] = new Stream; |
||||||
|
} |
||||||
|
|
||||||
|
StereoMultiGpuStream::~StereoMultiGpuStream() |
||||||
|
{ |
||||||
|
gpu::setDevice(0); |
||||||
|
d_leftFrames[0].release(); |
||||||
|
d_rightFrames[0].release(); |
||||||
|
d_disparities[0].release(); |
||||||
|
d_algs[0].release(); |
||||||
|
streams[0].release(); |
||||||
|
|
||||||
|
gpu::setDevice(1); |
||||||
|
d_leftFrames[1].release(); |
||||||
|
d_rightFrames[1].release(); |
||||||
|
d_disparities[1].release(); |
||||||
|
d_algs[1].release(); |
||||||
|
streams[1].release(); |
||||||
|
} |
||||||
|
|
||||||
|
void StereoMultiGpuStream::compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity) |
||||||
|
{ |
||||||
|
disparity.create(leftFrame.size(), CV_8UC1); |
||||||
|
|
||||||
|
// Split input data onto two parts for each GPUs.
|
||||||
|
// We add small border for each part,
|
||||||
|
// because original algorithm doesn't calculate disparity on image borders.
|
||||||
|
// With such padding we will get output in the middle of final result.
|
||||||
|
|
||||||
|
Mat leftFrameHdr = leftFrame.createMatHeader(); |
||||||
|
Mat rightFrameHdr = rightFrame.createMatHeader(); |
||||||
|
Mat disparityHdr = disparity.createMatHeader(); |
||||||
|
Mat disparityPart0 = disparityHdr.rowRange(0, leftFrame.rows / 2); |
||||||
|
Mat disparityPart1 = disparityHdr.rowRange(leftFrame.rows / 2, leftFrame.rows); |
||||||
|
|
||||||
|
gpu::setDevice(0); |
||||||
|
streams[0]->enqueueUpload(leftFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), d_leftFrames[0]); |
||||||
|
streams[0]->enqueueUpload(rightFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), d_rightFrames[0]); |
||||||
|
(*d_algs[0])(d_leftFrames[0], d_rightFrames[0], d_disparities[0], *streams[0]); |
||||||
|
streams[0]->enqueueDownload(d_disparities[0].rowRange(0, leftFrame.rows / 2), disparityPart0); |
||||||
|
|
||||||
|
gpu::setDevice(1); |
||||||
|
streams[1]->enqueueUpload(leftFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), d_leftFrames[1]); |
||||||
|
streams[1]->enqueueUpload(rightFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), d_rightFrames[1]); |
||||||
|
(*d_algs[1])(d_leftFrames[1], d_rightFrames[1], d_disparities[1], *streams[1]); |
||||||
|
streams[1]->enqueueDownload(d_disparities[1].rowRange(32, d_disparities[1].rows), disparityPart1); |
||||||
|
|
||||||
|
gpu::setDevice(0); |
||||||
|
streams[0]->waitForCompletion(); |
||||||
|
|
||||||
|
gpu::setDevice(1); |
||||||
|
streams[1]->waitForCompletion(); |
||||||
|
} |
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// main
|
||||||
|
|
||||||
int main(int argc, char** argv) |
int main(int argc, char** argv) |
||||||
{ |
{ |
||||||
if (argc < 5) |
if (argc != 3) |
||||||
{ |
{ |
||||||
printHelp(); |
cerr << "Usage: stereo_multi_gpu <left_video> <right_video>" << endl; |
||||||
return -1; |
return -1; |
||||||
} |
} |
||||||
|
|
||||||
int num_devices = getCudaEnabledDeviceCount(); |
const int numDevices = getCudaEnabledDeviceCount(); |
||||||
if (num_devices < 2) |
if (numDevices != 2) |
||||||
{ |
{ |
||||||
std::cout << "Two or more GPUs are required\n"; |
cerr << "Two GPUs are required" << endl; |
||||||
return -1; |
return -1; |
||||||
} |
} |
||||||
for (int i = 0; i < num_devices; ++i) |
|
||||||
{ |
|
||||||
cv::gpu::printShortCudaDeviceInfo(i); |
|
||||||
|
|
||||||
DeviceInfo dev_info(i); |
for (int i = 0; i < numDevices; ++i) |
||||||
if (!dev_info.isCompatible()) |
{ |
||||||
|
DeviceInfo devInfo(i); |
||||||
|
if (!devInfo.isCompatible()) |
||||||
{ |
{ |
||||||
std::cout << "GPU module isn't built for GPU #" << i << " (" |
cerr << "GPU module was't built for GPU #" << i << " (" |
||||||
<< dev_info.name() << ", CC " << dev_info.majorVersion() |
<< devInfo.name() << ", CC " << devInfo.majorVersion() |
||||||
<< dev_info.minorVersion() << "\n"; |
<< devInfo.minorVersion() << endl; |
||||||
return -1; |
return -1; |
||||||
} |
} |
||||||
|
|
||||||
|
printShortCudaDeviceInfo(i); |
||||||
} |
} |
||||||
|
|
||||||
// Load input data
|
VideoCapture leftVideo(argv[1]); |
||||||
Mat left, right; |
VideoCapture rightVideo(argv[2]); |
||||||
for (int i = 1; i < argc; ++i) |
|
||||||
|
if (!leftVideo.isOpened()) |
||||||
{ |
{ |
||||||
if (string(argv[i]) == "--left") |
cerr << "Can't open " << argv[1] << " video file" << endl; |
||||||
{ |
return -1; |
||||||
left = imread(argv[++i], CV_LOAD_IMAGE_GRAYSCALE); |
} |
||||||
CV_Assert(!left.empty()); |
|
||||||
} |
if (!rightVideo.isOpened()) |
||||||
else if (string(argv[i]) == "--right") |
{ |
||||||
{ |
cerr << "Can't open " << argv[2] << " video file" << endl; |
||||||
right = imread(argv[++i], CV_LOAD_IMAGE_GRAYSCALE); |
return -1; |
||||||
CV_Assert(!right.empty()); |
} |
||||||
} |
|
||||||
else if (string(argv[i]) == "--help") |
cout << endl; |
||||||
|
cout << "This sample demonstrates working on one piece of data using two GPUs." << endl; |
||||||
|
cout << "It splits input into two parts and processes them separately on different GPUs." << endl; |
||||||
|
cout << endl; |
||||||
|
|
||||||
|
Mat leftFrame, rightFrame; |
||||||
|
CudaMem leftGrayFrame, rightGrayFrame; |
||||||
|
|
||||||
|
StereoSingleGpu gpu0Alg(0); |
||||||
|
StereoSingleGpu gpu1Alg(1); |
||||||
|
StereoMultiGpuThread multiThreadAlg; |
||||||
|
StereoMultiGpuStream multiStreamAlg; |
||||||
|
|
||||||
|
Mat disparityGpu0; |
||||||
|
Mat disparityGpu1; |
||||||
|
Mat disparityMultiThread; |
||||||
|
CudaMem disparityMultiStream; |
||||||
|
|
||||||
|
Mat disparityGpu0Show; |
||||||
|
Mat disparityGpu1Show; |
||||||
|
Mat disparityMultiThreadShow; |
||||||
|
Mat disparityMultiStreamShow; |
||||||
|
|
||||||
|
TickMeter tm; |
||||||
|
|
||||||
|
cout << "-------------------------------------------------------------------" << endl; |
||||||
|
cout << "| Frame | GPU 0 ms | GPU 1 ms | Multi Thread ms | Multi Stream ms |" << endl; |
||||||
|
cout << "-------------------------------------------------------------------" << endl; |
||||||
|
|
||||||
|
for (int i = 0;; ++i) |
||||||
|
{ |
||||||
|
leftVideo >> leftFrame; |
||||||
|
rightVideo >> rightFrame; |
||||||
|
|
||||||
|
if (leftFrame.empty() || rightFrame.empty()) |
||||||
|
break; |
||||||
|
|
||||||
|
if (leftFrame.size() != rightFrame.size()) |
||||||
{ |
{ |
||||||
printHelp(); |
cerr << "Frames have different sizes" << endl; |
||||||
return -1; |
return -1; |
||||||
} |
} |
||||||
} |
|
||||||
|
|
||||||
// Split source images for processing on the GPU #0
|
leftGrayFrame.create(leftFrame.size(), CV_8UC1); |
||||||
setDevice(0); |
rightGrayFrame.create(leftFrame.size(), CV_8UC1); |
||||||
d_left[0].upload(left.rowRange(0, left.rows / 2)); |
|
||||||
d_right[0].upload(right.rowRange(0, right.rows / 2)); |
|
||||||
bm[0] = new StereoBM_GPU(); |
|
||||||
|
|
||||||
// Split source images for processing on the GPU #1
|
|
||||||
setDevice(1); |
|
||||||
d_left[1].upload(left.rowRange(left.rows / 2, left.rows)); |
|
||||||
d_right[1].upload(right.rowRange(right.rows / 2, right.rows)); |
|
||||||
bm[1] = new StereoBM_GPU(); |
|
||||||
|
|
||||||
// Execute calculation in two threads using two GPUs
|
|
||||||
int devices[] = {0, 1}; |
|
||||||
parallel_do(devices, devices + 2, Worker()); |
|
||||||
|
|
||||||
// Release the first GPU resources
|
|
||||||
setDevice(0); |
|
||||||
imshow("GPU #0 result", Mat(d_result[0])); |
|
||||||
d_left[0].release(); |
|
||||||
d_right[0].release(); |
|
||||||
d_result[0].release(); |
|
||||||
delete bm[0]; |
|
||||||
|
|
||||||
// Release the second GPU resources
|
|
||||||
setDevice(1); |
|
||||||
imshow("GPU #1 result", Mat(d_result[1])); |
|
||||||
d_left[1].release(); |
|
||||||
d_right[1].release(); |
|
||||||
d_result[1].release(); |
|
||||||
delete bm[1]; |
|
||||||
|
|
||||||
waitKey(); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
|
cvtColor(leftFrame, leftGrayFrame.createMatHeader(), COLOR_BGR2GRAY); |
||||||
|
cvtColor(rightFrame, rightGrayFrame.createMatHeader(), COLOR_BGR2GRAY); |
||||||
|
|
||||||
void Worker::operator()(int device_id) const |
tm.reset(); tm.start(); |
||||||
{ |
gpu0Alg.compute(leftGrayFrame, rightGrayFrame, disparityGpu0); |
||||||
setDevice(device_id); |
tm.stop(); |
||||||
|
|
||||||
bm[device_id]->operator()(d_left[device_id], d_right[device_id], |
const double gpu0Time = tm.getTimeMilli(); |
||||||
d_result[device_id]); |
|
||||||
|
|
||||||
std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() |
tm.reset(); tm.start(); |
||||||
<< "): finished\n"; |
gpu1Alg.compute(leftGrayFrame, rightGrayFrame, disparityGpu1); |
||||||
} |
tm.stop(); |
||||||
|
|
||||||
#endif |
const double gpu1Time = tm.getTimeMilli(); |
||||||
|
|
||||||
|
tm.reset(); tm.start(); |
||||||
|
multiThreadAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiThread); |
||||||
|
tm.stop(); |
||||||
|
|
||||||
|
const double multiThreadTime = tm.getTimeMilli(); |
||||||
|
|
||||||
|
tm.reset(); tm.start(); |
||||||
|
multiStreamAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiStream); |
||||||
|
tm.stop(); |
||||||
|
|
||||||
|
const double multiStreamTime = tm.getTimeMilli(); |
||||||
|
|
||||||
|
cout << "| " << setw(5) << i << " | " |
||||||
|
<< setw(8) << setprecision(1) << fixed << gpu0Time << " | " |
||||||
|
<< setw(8) << setprecision(1) << fixed << gpu1Time << " | " |
||||||
|
<< setw(15) << setprecision(1) << fixed << multiThreadTime << " | " |
||||||
|
<< setw(15) << setprecision(1) << fixed << multiStreamTime << " |" << endl; |
||||||
|
|
||||||
|
resize(disparityGpu0, disparityGpu0Show, Size(1024, 768), 0, 0, INTER_AREA); |
||||||
|
resize(disparityGpu1, disparityGpu1Show, Size(1024, 768), 0, 0, INTER_AREA); |
||||||
|
resize(disparityMultiThread, disparityMultiThreadShow, Size(1024, 768), 0, 0, INTER_AREA); |
||||||
|
resize(disparityMultiStream.createMatHeader(), disparityMultiStreamShow, Size(1024, 768), 0, 0, INTER_AREA); |
||||||
|
|
||||||
|
imshow("disparityGpu0", disparityGpu0Show); |
||||||
|
imshow("disparityGpu1", disparityGpu1Show); |
||||||
|
imshow("disparityMultiThread", disparityMultiThreadShow); |
||||||
|
imshow("disparityMultiStream", disparityMultiStreamShow); |
||||||
|
|
||||||
|
const int key = waitKey(30) & 0xff; |
||||||
|
if (key == 27) |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
cout << "-------------------------------------------------------------------" << endl; |
||||||
|
|
||||||
|
return 0; |
||||||
|
} |
||||||
|
Loading…
Reference in new issue