Merge pull request #2536 from vchiluka5:NVOF_Bug_Fixes

pull/2540/head
Alexander Alekhin 5 years ago
commit 1311b05747
  1. 14
      modules/cudaoptflow/include/opencv2/cudaoptflow.hpp
  2. 4
      modules/cudaoptflow/samples/optical_flow.cpp
  3. 86
      modules/cudaoptflow/src/nvidiaOpticalFlow.cpp

@ -118,7 +118,9 @@ public:
@param inputImage Input image.
@param referenceImage Reference image of the same size and the same type as input image.
@param flow A buffer consisting of inputImage.Size() / getGridSize() flow vectors in CV_16SC2 format.
@param stream Stream for the asynchronous version.
@param stream It is highly recommended that CUDA streams for pre and post processing of optical flow vectors should be set once per session in create() function as a part of optical flow session creation.
This parameter is left here for backward compatibility and may be removed in the future.
Default value is NULL stream;
@param hint Hint buffer if client provides external hints. Must have same size as flow buffer.
Caller can provide flow vectors as hints for optical flow calculation.
@param cost Cost buffer contains numbers indicating the confidence associated with each of the generated flow vectors.
@ -435,6 +437,12 @@ public:
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
@param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers.
The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine.
If input stream is not set, the execute function will use default stream which is NULL stream;
@param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors.
The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine.
If output stream is not set, the execute function will use default stream which is NULL stream;
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_1_0> create(
int width,
@ -444,7 +452,9 @@ public:
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,
int gpuId = 0);
int gpuId = 0,
Stream& inputStream = Stream::Null(),
Stream& outputStream = Stream::Null());
};
//! @}

@ -181,8 +181,8 @@ int main(int argc, const char* argv[])
Ptr<cuda::DensePyrLKOpticalFlow> lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7));
Ptr<cuda::FarnebackOpticalFlow> farn = cuda::FarnebackOpticalFlow::create();
Ptr<cuda::OpticalFlowDual_TVL1> tvl1 = cuda::OpticalFlowDual_TVL1::create();
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof = cuda::NvidiaOpticalFlow_1_0::create(
frame0.size().width, frame0.size().height, NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST);
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof = cuda::NvidiaOpticalFlow_1_0::create(frame0.size().width, frame0.size().height,
NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, false, false, false, 0, Stream(), Stream());
{
GpuMat d_frame0f;

@ -8,11 +8,11 @@
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int) { throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0>(); }
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) { throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0>(); }
#elif !defined HAVE_NVIDIA_OPTFLOW
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int)
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&)
{
CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support");
}
@ -236,6 +236,8 @@ private:
bool m_enableExternalHints;
bool m_enableCostBuffer;
int m_gpuId;
Stream m_inputStream;
Stream m_outputStream;
CUcontext m_cuContext;
NV_OF_BUFFER_FORMAT m_format;
@ -286,13 +288,14 @@ protected:
std::mutex m_lock;
public:
NvidiaOpticalFlowImpl(int width, int height, NV_OF_PERF_LEVEL perfPreset,
bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId);
NvidiaOpticalFlowImpl(int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream);
virtual void calc(InputArray inputImage, InputArray referenceImage,
InputOutputArray flow, Stream& stream = Stream::Null(),
InputArray hint = cv::noArray(), OutputArray cost = cv::noArray());
virtual void collectGarbage();
virtual void upSampler(InputArray flow, int width, int height,
@ -303,11 +306,13 @@ public:
NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId) :
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId,
Stream inputStream, Stream outputStream) :
m_width(width), m_height(height), m_preset(perfPreset),
m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints),
m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints),
m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId),
m_inputStream(inputStream), m_outputStream(outputStream),
m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
{
@ -387,6 +392,12 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
NVOF_API_CALL(GetAPI()->nvOFInit(GetHandle(), &m_initParams));
if (m_inputStream || m_outputStream)
{
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream)));
}
//Input Buffer 1
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hInputBuffer));
@ -432,13 +443,12 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow,
Stream& stream, InputArray hint, OutputArray cost)
{
Stream inputStream = {};
Stream outputStream = {};
if (stream)
inputStream = stream;
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
StreamAccessor::getStream(inputStream), StreamAccessor::getStream(outputStream)));
if (stream && !m_inputStream)
{
m_inputStream = stream;
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream)));
}
GpuMat frame0GpuMat(_frame0.size(), _frame0.type(), (void*)m_frame0cuDevPtr,
m_inputBufferStrideInfo.strideInfo[0].strideXInBytes);
@ -452,12 +462,14 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
if (_frame0.isMat())
{
//Get Mats from InputArrays
frame0GpuMat.upload(_frame0);
Mat __frame0 = _frame0.getMat();
frame0GpuMat.upload(__frame0, m_inputStream);
}
else if (_frame0.isGpuMat())
{
//Get GpuMats from InputArrays
_frame0.copyTo(frame0GpuMat);
GpuMat __frame0 = _frame0.getGpuMat();
__frame0.copyTo(frame0GpuMat, m_inputStream);
}
else
{
@ -469,12 +481,14 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
if (_frame1.isMat())
{
//Get Mats from InputArrays
frame1GpuMat.upload(_frame1);
Mat __frame1 = _frame1.getMat();
frame1GpuMat.upload(__frame1, m_inputStream);
}
else if (_frame1.isGpuMat())
{
//Get GpuMats from InputArrays
_frame1.copyTo(frame1GpuMat);
GpuMat __frame1 = _frame1.getGpuMat();
__frame1.copyTo(frame1GpuMat, m_inputStream);
}
else
{
@ -490,12 +504,14 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
if (hint.isMat())
{
//Get Mat from InputArray hint
hintGpuMat.upload(hint);
Mat _hint = hint.getMat();
hintGpuMat.upload(_hint, m_inputStream);
}
else if(hint.isGpuMat())
{
//Get GpuMat from InputArray hint
hint.copyTo(hintGpuMat);
GpuMat _hint = hint.getGpuMat();
_hint.copyTo(hintGpuMat, m_inputStream);
}
else
{
@ -503,8 +519,6 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
}
}
inputStream.waitForCompletion();
//Execute Call
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams;
@ -518,15 +532,13 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
memset(&exeOutParams, 0, sizeof(exeOutParams));
exeOutParams.outputBuffer = m_hOutputBuffer;
exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE ?
m_hCostBuffer : nullptr;;
m_hCostBuffer : nullptr;
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams));
outputStream.waitForCompletion();
if (_flow.isMat())
flowXYGpuMat.download(_flow);
flowXYGpuMat.download(_flow, m_outputStream);
else if(_flow.isGpuMat())
flowXYGpuMat.copyTo(_flow);
flowXYGpuMat.copyTo(_flow, m_outputStream);
else
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat");
@ -537,13 +549,13 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
m_costBufferStrideInfo.strideInfo[0].strideXInBytes);
if (cost.isMat())
costGpuMat.download(cost);
costGpuMat.download(cost, m_outputStream);
else if(cost.isGpuMat())
costGpuMat.copyTo(cost);
costGpuMat.copyTo(cost, m_outputStream);
else
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat");
}
cuSafeCall(cudaDeviceSynchronize());
m_outputStream.waitForCompletion();
}
void NvidiaOpticalFlowImpl::collectGarbage()
@ -574,6 +586,14 @@ void NvidiaOpticalFlowImpl::collectGarbage()
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer));
}
}
if (m_inputStream)
{
m_inputStream.waitForCompletion();
}
if (m_outputStream)
{
m_outputStream.waitForCompletion();
}
if (m_hOF)
{
NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF));
@ -586,7 +606,8 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
Mat flow;
if (_flow.isMat())
{
_flow.copyTo(flow);
Mat __flow = _flow.getMat();
__flow.copyTo(flow);
}
else if (_flow.isGpuMat())
{
@ -640,7 +661,8 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(
int width, int height, NVIDIA_OF_PERF_LEVEL perfPreset,
bool bEnableTemporalHints, bool bEnableExternalHints,
bool bEnableCostBuffer, int gpuId)
bool bEnableCostBuffer, int gpuId,
Stream& inputStream, Stream& outputStream)
{
return makePtr<NvidiaOpticalFlowImpl>(
width,
@ -649,6 +671,8 @@ Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(
bEnableTemporalHints,
bEnableExternalHints,
bEnableCostBuffer,
gpuId);
gpuId,
inputStream,
outputStream);
}
#endif

Loading…
Cancel
Save