NVIDIA_OPTICAL_FLOW_2_0_INTEGRATION

pull/2807/head
Vishal Chiluka 5 years ago committed by Vishal Bhaskar Chiluka
parent 0b6b8ff9dd
commit 582fe44b7a
  1. 20
      modules/cudaoptflow/CMakeLists.txt
  2. 150
      modules/cudaoptflow/include/opencv2/cudaoptflow.hpp
  3. 36
      modules/cudaoptflow/misc/python/test/test_nvidiaopticalflow.py
  4. 70
      modules/cudaoptflow/perf/perf_optflow.cpp
  5. 168
      modules/cudaoptflow/samples/nvidia_optical_flow.cpp
  6. 33
      modules/cudaoptflow/samples/optical_flow.cpp
  7. 99
      modules/cudaoptflow/src/cuda/nvidiaOpticalFlow.cu
  8. 658
      modules/cudaoptflow/src/nvidiaOpticalFlow.cpp
  9. 114
      modules/cudaoptflow/test/test_optflow.cpp

@ -8,21 +8,21 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-d
ocv_define_module(cudaoptflow opencv_video opencv_optflow opencv_cudaarithm opencv_cudawarping opencv_cudaimgproc OPTIONAL opencv_cudalegacy WRAP python)
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT "79c6cee80a2df9a196f20afd6b598a9810964c32")
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5 "ca5acedee6cb45d0ec610a6732de5c15")
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_1_0_Headers")
ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}.zip"
HASH ${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5}
set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT "edb50da3cf849840d680249aa6dbef248ebce2ca")
set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_MD5 "a73cd48b18dcc0cc8933b30796074191")
set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_2_0_Headers")
ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT}.zip"
HASH ${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_MD5}
URL
"https://github.com/NVIDIA/NVIDIAOpticalFlowSDK/archive/"
DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}"
STATUS NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS
DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH}"
STATUS NVIDIA_OPTICAL_FLOW_2_0_HEADERS_DOWNLOAD_SUCCESS
ID "NVIDIA_OPTICAL_FLOW"
UNPACK RELATIVE_URL)
if(NOT NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS)
message(STATUS "Failed to download NVIDIA_Optical_Flow_1_0 Headers")
if(NOT NVIDIA_OPTICAL_FLOW_2_0_HEADERS_DOWNLOAD_SUCCESS)
message(STATUS "Failed to download NVIDIA_Optical_Flow_2_0 Headers")
else()
add_definitions(-DHAVE_NVIDIA_OPTFLOW=1)
ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}")
ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT}")
endif()

@ -392,9 +392,9 @@ public:
/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 1.0.
@note
- A sample application demonstrating the use of NVIDIA Optical Flow can be found at
opencv_source_code/samples/gpu/nvidia_optical_flow.cpp
opencv_contrib_source_code/modules/cudaoptflow/samples/nvidia_optical_flow.cpp
- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at
opencv_source_code/samples/gpu/optical_flow.cpp
opencv_contrib_source_code/modules/cudaoptflow/samples/optical_flow.cpp
*/
class CV_EXPORTS_W NvidiaOpticalFlow_1_0 : public NvidiaHWOpticalFlow
@ -417,18 +417,16 @@ public:
* using nearest neighbour upsampling method.
@param flow Buffer of type CV_16FC2 containing flow vectors generated by calc().
@param width Width of the input image in pixels for which these flow vectors were generated.
@param height Height of the input image in pixels for which these flow vectors were generated.
@param imageSize Size of the input image in pixels for which these flow vectors were generated.
@param gridSize Granularity of the optical flow vectors returned by calc() function. Can be queried using getGridSize().
@param upsampledFlow Buffer of type CV_32FC2, containing upsampled flow vectors, each flow vector for 1 pixel, in the pitch-linear layout.
*/
CV_WRAP virtual void upSampler(InputArray flow, int width, int height,
CV_WRAP virtual void upSampler(InputArray flow, cv::Size imageSize,
int gridSize, InputOutputArray upsampledFlow) = 0;
/** @brief Instantiate NVIDIA Optical Flow
@param width Width of input image in pixels.
@param height Height of input image in pixels.
@param imageSize Size of input image in pixels.
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
Defaults to NV_OF_PERF_LEVEL_SLOW.
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
@ -445,10 +443,142 @@ public:
If output stream is not set, the execute function will use default stream which is NULL stream;
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_1_0> create(
int width,
int height,
cv::Size imageSize,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset
= cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
= cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW,
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,
int gpuId = 0,
Stream& inputStream = Stream::Null(),
Stream& outputStream = Stream::Null());
};
/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 2.0.
@note
- A sample application demonstrating the use of NVIDIA Optical Flow can be found at
opencv_contrib_source_code/modules/cudaoptflow/samples/nvidia_optical_flow.cpp
- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at
opencv_contrib_source_code/modules/cudaoptflow/samples/optical_flow.cpp
*/
class CV_EXPORTS_W NvidiaOpticalFlow_2_0 : public NvidiaHWOpticalFlow
{
public:
/**
* Supported optical flow performance levels.
*/
enum NVIDIA_OF_PERF_LEVEL
{
NV_OF_PERF_LEVEL_UNDEFINED,
NV_OF_PERF_LEVEL_SLOW = 5, /**< Slow perf level results in lowest performance and best quality */
NV_OF_PERF_LEVEL_MEDIUM = 10, /**< Medium perf level results in low performance and medium quality */
NV_OF_PERF_LEVEL_FAST = 20, /**< Fast perf level results in high performance and low quality */
NV_OF_PERF_LEVEL_MAX
};
/**
* Supported grid size for output buffer.
*/
enum NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE
{
NV_OF_OUTPUT_VECTOR_GRID_SIZE_UNDEFINED,
NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 = 1, /**< Output buffer grid size is 1x1 */
NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 = 2, /**< Output buffer grid size is 2x2 */
NV_OF_OUTPUT_VECTOR_GRID_SIZE_4 = 4, /**< Output buffer grid size is 4x4 */
NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX
};
/**
* Supported grid size for hint buffer.
*/
enum NVIDIA_OF_HINT_VECTOR_GRID_SIZE
{
NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED,
NV_OF_HINT_VECTOR_GRID_SIZE_1 = 1, /**< Hint buffer grid size is 1x1.*/
NV_OF_HINT_VECTOR_GRID_SIZE_2 = 2, /**< Hint buffer grid size is 2x2.*/
NV_OF_HINT_VECTOR_GRID_SIZE_4 = 4, /**< Hint buffer grid size is 4x4.*/
NV_OF_HINT_VECTOR_GRID_SIZE_8 = 8, /**< Hint buffer grid size is 8x8.*/
NV_OF_HINT_VECTOR_GRID_SIZE_MAX
};
/** @brief convertToFloat() helper function converts the hardware-generated flow vectors to floating point representation (1 flow vector for gridSize).
* gridSize can be queried via function getGridSize().
@param flow Buffer of type CV_16FC2 containing flow vectors generated by calc().
@param floatFlow Buffer of type CV_32FC2, containing flow vectors in floating point representation, each flow vector for 1 pixel per gridSize, in the pitch-linear layout.
*/
CV_WRAP virtual void convertToFloat(InputArray flow, InputOutputArray floatFlow) = 0;
/** @brief Instantiate NVIDIA Optical Flow
@param imageSize Size of input image in pixels.
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
Defaults to NV_OF_PERF_LEVEL_SLOW.
@param outputGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about output grid sizes.
Defaults to NV_OF_OUTPUT_VECTOR_GRID_SIZE_1.
@param hintGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about hint grid sizes.
Defaults to NV_OF_HINT_VECTOR_GRID_SIZE_1.
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
generated in previous call to calc() as internal hints for the current call to calc().
Useful when computing flow vectors between successive video frames. Defaults to false.
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
@param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers.
The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine.
If input stream is not set, the execute function will use default stream which is NULL stream;
@param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors.
The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine.
If output stream is not set, the execute function will use default stream which is NULL stream;
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_2_0> create(
cv::Size imageSize,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_PERF_LEVEL_SLOW,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_1,
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,
int gpuId = 0,
Stream& inputStream = Stream::Null(),
Stream& outputStream = Stream::Null());
/** @brief Instantiate NVIDIA Optical Flow with ROI Feature
@param imageSize Size of input image in pixels.
@param roiData Pointer to ROI data.
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
Defaults to NV_OF_PERF_LEVEL_SLOW.
@param outputGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about output grid sizes.
Defaults to NV_OF_OUTPUT_VECTOR_GRID_SIZE_1.
@param hintGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about hint grid sizes.
Defaults to NV_OF_HINT_VECTOR_GRID_SIZE_1.
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
generated in previous call to calc() as internal hints for the current call to calc().
Useful when computing flow vectors between successive video frames. Defaults to false.
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
@param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers.
The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine.
If input stream is not set, the execute function will use default stream which is NULL stream;
@param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors.
The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine.
If output stream is not set, the execute function will use default stream which is NULL stream;
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_2_0> create(
cv::Size imageSize,
std::vector<Rect> roiData,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_PERF_LEVEL_SLOW,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_1,
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,

@ -0,0 +1,36 @@
import os
import cv2 as cv
import numpy as np
from tests_common import NewOpenCVTests, unittest
class nvidiaopticalflow_test(NewOpenCVTests):
def setUp(self):
super(nvidiaopticalflow_test, self).setUp()
if not cv.cuda.getCudaEnabledDeviceCount():
self.skipTest("No CUDA-capable device is detected")
@unittest.skipIf('OPENCV_TEST_DATA_PATH' not in os.environ,
"OPENCV_TEST_DATA_PATH is not defined")
def test_calc(self):
frame1 = os.environ['OPENCV_TEST_DATA_PATH'] + '/gpu/opticalflow/frame0.png'
frame2 = os.environ['OPENCV_TEST_DATA_PATH'] + '/gpu/opticalflow/frame1.png'
npMat1 = cv.cvtColor(cv.imread(frame1),cv.COLOR_BGR2GRAY)
npMat2 = cv.cvtColor(cv.imread(frame2),cv.COLOR_BGR2GRAY)
cuMat1 = cv.cuda_GpuMat(npMat1)
cuMat2 = cv.cuda_GpuMat(npMat2)
try:
nvof = cv.cuda_NvidiaOpticalFlow_1_0.create(cuMat1.shape[1], cuMat1.shape[0], 5, False, False, False, 0)
flow = nvof.calc(cuMat1, cuMat2, None)
self.assertTrue(flow.shape[1] > 0 and flow.shape[0] > 0)
flowUpSampled = nvof.upSampler(flow[0], cuMat1.shape[1], cuMat1.shape[0], nvof.getGridSize(), None)
nvof.collectGarbage()
except cv.error as e:
if e.code == cv.Error.StsBadFunc or e.code == cv.Error.StsBadArg or e.code == cv.Error.StsNullPtr:
self.skipTest("Algorithm is not supported in the current environment")
self.assertTrue(flowUpSampled.shape[1] > 0 and flowUpSampled.shape[0] > 0)
if __name__ == '__main__':
NewOpenCVTests.bootstrap()

@ -339,13 +339,8 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
Stream inputStream;
Stream outputStream;
if (PERF_RUN_CUDA())
{
@ -355,9 +350,9 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
false, false, false, 0, inputStream, outputStream);
}
catch (const cv::Exception& e)
{
@ -376,6 +371,63 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
CUDA_SANITY_CHECK(u, 1e-10);
CUDA_SANITY_CHECK(v, 1e-10);
d_nvof->collectGarbage();
}
}
//////////////////////////////////////////////////////
// NvidiaOpticalFlow_2_0
PERF_TEST_P(ImagePair, NvidiaOpticalFlow_2_0,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
declare.time(10);
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1;
const cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_1;
Stream inputStream;
Stream outputStream;
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat d_flow;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, outGridSize, hintGridSize,
false, false, false, 0, inputStream, outputStream);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
TEST_CYCLE() d_nvof->calc(d_frame0, d_frame1, d_flow);
cv::cuda::GpuMat flow[2];
cv::cuda::split(d_flow, flow);
cv::cuda::GpuMat u = flow[0];
cv::cuda::GpuMat v = flow[1];
CUDA_SANITY_CHECK(u, 1e-10);
CUDA_SANITY_CHECK(v, 1e-10);
d_nvof->collectGarbage();
}
}

@ -2,6 +2,7 @@
#include <iostream>
#include <fstream>
#include <iomanip>
#include <iterator>
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
@ -11,7 +12,6 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/video/tracking.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
@ -131,12 +131,88 @@ static void drawOpticalFlow(const Mat_<float>& flowx, const Mat_<float>& flowy
}
}
/*
ROI config file format.
numrois 3
roi0 640 96 1152 192
roi1 640 64 896 864
roi2 640 960 256 32
*/
bool parseROI(std::string ROIFileName, std::vector<Rect>& roiData)
{
std::string str;
uint32_t nRois = 0;
std::ifstream hRoiFile;
hRoiFile.open(ROIFileName, std::ios::in);
if (hRoiFile.is_open())
{
while (std::getline(hRoiFile, str))
{
std::istringstream iss(str);
std::vector<std::string> tokens{ std::istream_iterator<std::string>{iss},
std::istream_iterator<std::string>{} };
if (tokens.size() == 0) continue; // if empty line, coninue
transform(tokens[0].begin(), tokens[0].end(), tokens[0].begin(), ::tolower);
if (tokens[0] == "numrois")
{
nRois = atoi(tokens[1].data());
}
else if (tokens[0].rfind("roi", 0) == 0)
{
cv::Rect roi;
roi.x = atoi(tokens[1].data());
roi.y = atoi(tokens[2].data());
roi.width = atoi(tokens[3].data());
roi.height = atoi(tokens[4].data());
roiData.push_back(roi);
}
else if (tokens[0].rfind("#", 0) == 0)
{
continue;
}
else
{
std::cout << "Unidentified keyword in roi config file " << tokens[0] << std::endl;
hRoiFile.close();
return false;
}
}
}
else
{
std::cout << "Unable to open ROI file " << std::endl;
return false;
}
if (nRois != roiData.size())
{
std::cout << "NumRois(" << nRois << ")and specified roi rects (" << roiData.size() << ")are not matching " << std::endl;
hRoiFile.close();
return false;
}
hRoiFile.close();
return true;
}
int main(int argc, char **argv)
{
std::unordered_map<std::string, NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL> presetMap = {
{ "slow", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW },
{ "medium", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_MEDIUM },
{ "fast", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST } };
std::unordered_map<std::string, NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL> presetMap = {
{ "slow", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW },
{ "medium", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_MEDIUM },
{ "fast", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST } };
std::unordered_map<int, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE> outputGridSize = {
{ 1, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 },
{ 2, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 },
{ 4, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_4 } };
std::unordered_map<int, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE> hintGridSize = {
{ 1, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_1 },
{ 2, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_2 },
{ 4, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_4 },
{ 8, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_8 } };
try
{
@ -145,7 +221,10 @@ int main(int argc, char **argv)
"{ r right | ../data/basketball2.png | specify right image }"
"{ g gpuid | 0 | cuda device index}"
"{ p preset | slow | perf preset for OF algo [ options : slow, medium, fast ]}"
"{ og outputGridSize | 1 | Output grid size of OF vector [ options : 1, 2, 4 ]}"
"{ hg hintGridSize | 1 | Hint grid size of OF vector [ options : 1, 2, 4, 8 ]}"
"{ o output | OpenCVNvOF.flo | output flow vector file in middlebury format}"
"{ rc roiConfigFile | | Region of Interest config file }"
"{ th enableTemporalHints | false | Enable temporal hints}"
"{ eh enableExternalHints | false | Enable external hints}"
"{ cb enableCostBuffer | false | Enable output cost buffer}"
@ -159,60 +238,93 @@ int main(int argc, char **argv)
return 0;
}
string pathL = cmd.get<string>("left");
string pathR = cmd.get<string>("right");
string preset = cmd.get<string>("preset");
string output = cmd.get<string>("output");
std::string pathL = cmd.get<std::string>("left");
std::string pathR = cmd.get<std::string>("right");
std::string preset = cmd.get<std::string>("preset");
std::string output = cmd.get<std::string>("output");
std::string roiConfiFile = cmd.get<std::string>("roiConfigFile");
bool enableExternalHints = cmd.get<bool>("enableExternalHints");
bool enableTemporalHints = cmd.get<bool>("enableTemporalHints");
bool enableCostBuffer = cmd.get<bool>("enableCostBuffer");
int gpuId = cmd.get<int>("gpuid");
int outputBufferGridSize = cmd.get<int>("outputGridSize");
int hintBufferGridSize = cmd.get<int>("hintGridSize");
if (pathL.empty()) cout << "Specify left image path\n";
if (pathR.empty()) cout << "Specify right image path\n";
if (preset.empty()) cout << "Specify perf preset for OpticalFlow algo\n";
if (pathL.empty()) std::cout << "Specify left image path" << std::endl;
if (pathR.empty()) std::cout << "Specify right image path" << std::endl;
if (preset.empty()) std::cout << "Specify perf preset for OpticalFlow algo" << std::endl;
if (pathL.empty() || pathR.empty()) return 0;
auto search = presetMap.find(preset);
if (search == presetMap.end())
auto p = presetMap.find(preset);
if (p == presetMap.end())
{
std::cout << "Invalid preset level : " << preset << std::endl;
return 0;
}
NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset = search->second;
NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset = p->second;
auto o = outputGridSize.find(outputBufferGridSize);
if (o == outputGridSize.end())
{
std::cout << "Invalid output grid size: " << outputBufferGridSize << std::endl;
return 0;
}
NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outBufGridSize = o->second;
NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintBufGridSize =
NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED;
if (enableExternalHints)
{
auto h = hintGridSize.find(hintBufferGridSize);
if (h == hintGridSize.end())
{
std::cout << "Invalid hint grid size: " << hintBufferGridSize << std::endl;
return 0;
}
hintBufGridSize = h->second;
}
std::vector<Rect> roiData;
if (!roiConfiFile.empty())
{
if (!parseROI(roiConfiFile, roiData))
{
std::cout << "Wrong Region of Interest config file, proceeding without ROI" << std::endl;
}
}
Mat frameL = imread(pathL, IMREAD_GRAYSCALE);
Mat frameR = imread(pathR, IMREAD_GRAYSCALE);
if (frameL.empty()) cout << "Can't open '" << pathL << "'\n";
if (frameR.empty()) cout << "Can't open '" << pathR << "'\n";
if (frameL.empty()) std::cout << "Can't open '" << pathL << "'" << std::endl;
if (frameR.empty()) std::cout << "Can't open '" << pathR << "'" << std::endl;
if (frameL.empty() || frameR.empty()) return -1;
Ptr<NvidiaOpticalFlow_1_0> nvof = NvidiaOpticalFlow_1_0::create(
frameL.size().width, frameL.size().height, perfPreset,
Ptr<NvidiaOpticalFlow_2_0> nvof = NvidiaOpticalFlow_2_0::create(
frameL.size(), roiData, perfPreset, outBufGridSize, hintBufGridSize,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuId);
Mat flowx, flowy, flowxy, upsampledFlowXY, image;
Mat flowx, flowy, flowxy, floatFlow, image;
nvof->calc(frameL, frameR, flowxy);
nvof->upSampler(flowxy, frameL.size().width, frameL.size().height,
nvof->getGridSize(), upsampledFlowXY);
nvof->convertToFloat(flowxy, floatFlow);
if (output.size() != 0)
if (!output.empty())
{
if (!writeOpticalFlow(output, upsampledFlowXY))
cout << "Failed to save Flow Vector" << endl;
if (!writeOpticalFlow(output, floatFlow))
std::cout << "Failed to save Flow Vector" << std::endl;
else
cout << "Flow vector saved as '" << output << "'\n";
std::cout << "Flow vector saved as '" << output << "'" << std::endl;
}
Mat planes[] = { flowx, flowy };
split(upsampledFlowXY, planes);
split(floatFlow, planes);
flowx = planes[0]; flowy = planes[1];
drawOpticalFlow(flowx, flowy, image, 10);
imshow("Colorize image",image);
imshow("Colorize image", image);
waitKey(0);
nvof->collectGarbage();
}

@ -183,8 +183,11 @@ int main(int argc, const char* argv[])
Ptr<cuda::DensePyrLKOpticalFlow> lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7));
Ptr<cuda::FarnebackOpticalFlow> farn = cuda::FarnebackOpticalFlow::create();
Ptr<cuda::OpticalFlowDual_TVL1> tvl1 = cuda::OpticalFlowDual_TVL1::create();
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof = cuda::NvidiaOpticalFlow_1_0::create(frame0.size().width, frame0.size().height,
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof_1_0 = cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, false, false, false, 0, inputStream, outputStream);
Ptr<cuda::NvidiaOpticalFlow_2_0> nvof_2_0 = cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1,
NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED, false, false, false, 0, inputStream, outputStream);
{
GpuMat d_frame0f;
@ -242,16 +245,32 @@ int main(int argc, const char* argv[])
//Hence it is expected to be more than what is displayed in the NVIDIA Optical Flow SDK documentation.
const int64 start = getTickCount();
nvof->calc(d_frame0, d_frame1, d_flowxy);
nvof_1_0->calc(d_frame0, d_frame1, d_flowxy);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "NVIDIAOpticalFlow : " << timeSec << " sec" << endl;
cout << "NVIDIAOpticalFlow_1_0 : " << timeSec << " sec" << endl;
nvof->upSampler(d_flowxy, frame0.size().width, frame0.size().height,
nvof->getGridSize(), d_flow);
nvof_1_0->upSampler(d_flowxy, frame0.size(), nvof_1_0->getGridSize(), d_flow);
showFlow("NVIDIAOpticalFlow", d_flow);
nvof->collectGarbage();
showFlow("NVIDIAOpticalFlow_1_0", d_flow);
nvof_1_0->collectGarbage();
}
{
//The timing displayed below includes the time taken to copy the input buffers to the OF CUDA input buffers
//and to copy the output buffers from the OF CUDA output buffer to the output buffer.
//Hence it is expected to be more than what is displayed in the NVIDIA Optical Flow SDK documentation.
const int64 start = getTickCount();
nvof_2_0->calc(d_frame0, d_frame1, d_flowxy);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "NVIDIAOpticalFlow_2_0 : " << timeSec << " sec" << endl;
nvof_2_0->convertToFloat(d_flowxy, d_flow);
showFlow("NVIDIAOpticalFlow_2_0", d_flow);
nvof_2_0->collectGarbage();
}
imshow("Frame 0", frame0);

@ -0,0 +1,99 @@
//
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
//M*/
#if !defined CUDA_DISABLER
#include <cuda_runtime.h>
#include <stdio.h>
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed short int16_t;
typedef signed int int32_t;
#define BLOCKDIM_X 32
#define BLOCKDIM_Y 16
// data required to do 2x upsampling. Same can be used for 4x upsampling also
#define SMEM_COLS ((BLOCKDIM_X)/2)
#define SMEM_ROWS ((BLOCKDIM_Y)/2)
namespace cv { namespace cuda { namespace device { namespace optflow_nvidia
{
static const char *_cudaGetErrorEnum(cudaError_t error) { return cudaGetErrorName(error); }
template <typename T>
void check(T result, char const *const func, const char *const file,
int const line) {
if (result) {
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
static_cast<uint32_t>(result), _cudaGetErrorEnum(result), func);
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
template <class T>
static __device__ void ReadDevPtrData(void* devptr, uint32_t x0, uint32_t y0, uint32_t src_w, uint32_t src_h, uint32_t src_pitch,
T src[][SMEM_COLS], uint32_t i, uint32_t j)
{
uint32_t shift = (sizeof(T) == sizeof(int32_t)) ? 2 : 1;
src[j][i] = *(T*)((uint8_t*)devptr + y0 * src_pitch + (x0 << shift));
}
extern "C"
__global__ void NearestNeighborFlowKernel(cudaSurfaceObject_t srcSurf, void* srcDevPtr, uint32_t src_w, uint32_t src_pitch, uint32_t src_h,
cudaSurfaceObject_t dstSurf, void* dstDevPtr, uint32_t dst_w, uint32_t dst_pitch, uint32_t dst_h,
uint32_t nScaleFactor)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int x0 = x / nScaleFactor;
int y0 = y / nScaleFactor;
__shared__ short2 src[SMEM_ROWS][SMEM_COLS];
int i = threadIdx.x / nScaleFactor;
int j = threadIdx.y / nScaleFactor;
if ((x % nScaleFactor == 0) && (y % nScaleFactor == 0))
{
ReadDevPtrData<short2>(srcDevPtr, x0, y0, src_w, src_h, src_pitch, src, i, j);
}
__syncthreads();
if (x < dst_w && y < dst_h)
{
if (dstDevPtr == NULL)
{
surf2Dwrite<short2>(src[j][i], dstSurf, x * sizeof(short2), y, cudaBoundaryModeClamp);
}
else
{
*(short2*)((uint8_t*)dstDevPtr + y * dst_pitch + (x << 2)) = src[j][i];
}
}
}
void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint32_t nSrcHeight,
void* dstDevPtr, uint32_t nDstWidth, uint32_t nDstPitch, uint32_t nDstHeight,
uint32_t nScaleFactor)
{
dim3 blockDim(BLOCKDIM_X, BLOCKDIM_Y);
dim3 gridDim((nDstWidth + blockDim.x - 1) / blockDim.x, (nDstHeight + blockDim.y - 1) / blockDim.y);
NearestNeighborFlowKernel << <gridDim, blockDim >> > (0, srcDevPtr, nSrcWidth, nSrcPitch, nSrcHeight,
0, dstDevPtr, nDstWidth, nDstPitch, nDstHeight,
nScaleFactor);
checkCudaErrors(cudaGetLastError());
}}}}}
#endif

@ -8,11 +8,27 @@
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) { throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0>(); }
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create
(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) {
throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0>(); }
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create(
int, int, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE,
bool, int, cv::Rect*, bool, bool, bool, int, Stream&, Stream&) {
throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0>();
}
#elif !defined HAVE_NVIDIA_OPTFLOW
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&)
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(
int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&)
{
CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support");
}
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create(
int, int, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE,
bool, int, cv::Rect*, bool, bool, bool, int, , Stream&, Stream&)
{
CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support");
}
@ -22,6 +38,13 @@ cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create
#include "nvOpticalFlowCommon.h"
#include "nvOpticalFlowCuda.h"
namespace cv { namespace cuda { namespace device { namespace optflow_nvidia
{
void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint32_t nSrcHeight,
void* dstDevPtr, uint32_t nDstWidth, uint32_t nDstPitch, uint32_t nDstHeight,
uint32_t nScaleFactor);
}}}}
#if defined(_WIN32) || defined(_WIN64)
#include <Windows.h>
#else
@ -123,7 +146,7 @@ class LoadNvidiaModules
private:
typedef int(*PFNCudaCuCtxGetCurrent)(CUcontext*);
typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda)
(uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
(int apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
PFNCudaCuCtxGetCurrent m_cudaDriverAPIGetCurrentCtx;
PFNNvOFAPICreateInstanceCuda m_NvOFAPICreateInstanceCuda;
@ -249,9 +272,9 @@ private:
NV_OF_BUFFER_DESCRIPTOR m_hintBufferDesc;
NV_OF_BUFFER_DESCRIPTOR m_costBufferDesc;
uint32_t m_outputElementSize;
uint32_t m_costBufElementSize;
uint32_t m_hintBufElementSize;
int m_outputElementSize;
int m_costBufElementSize;
int m_hintBufElementSize;
NV_OF_INIT_PARAMS m_initParams;
@ -288,7 +311,7 @@ protected:
std::mutex m_lock;
public:
NvidiaOpticalFlowImpl(int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
NvidiaOpticalFlowImpl(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream);
virtual void calc(InputArray inputImage, InputArray referenceImage,
@ -298,17 +321,17 @@ public:
virtual void collectGarbage();
virtual void upSampler(InputArray flow, int width, int height,
virtual void upSampler(InputArray flow, cv::Size imageSize,
int gridSize, InputOutputArray upsampledFlow);
virtual int getGridSize() const { return m_gridSize; }
};
NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId,
Stream inputStream, Stream outputStream) :
m_width(width), m_height(height), m_preset(perfPreset),
m_width(imageSize.width), m_height(imageSize.height), m_preset(perfPreset),
m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints),
m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints),
m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId),
@ -371,7 +394,7 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
m_costBufferDesc.height = nOutHeight;
m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT;
m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST;
m_costBufElementSize = sizeof(uint32_t);
m_costBufElementSize = sizeof(int);
}
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST());
@ -600,7 +623,7 @@ void NvidiaOpticalFlowImpl::collectGarbage()
}
}
void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, cv::Size imageSize,
int gridSize, InputOutputArray upsampledFlow)
{
Mat flow;
@ -622,26 +645,26 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
std::unique_ptr<float[]> flowVectors = nullptr;
const NV_OF_FLOW_VECTOR* _flowVectors = static_cast<const NV_OF_FLOW_VECTOR*>((const void*)flow.data);
flowVectors.reset(new float[2 * width * height]);
for (int y = 0; y < height; ++y)
flowVectors.reset(new float[2 * imageSize.width * imageSize.height]);
for (int y = 0; y < imageSize.height; ++y)
{
for (int x = 0; x < width; ++x)
for (int x = 0; x < imageSize.width; ++x)
{
uint32_t blockIdX = x / gridSize;
uint32_t blockIdY = y / gridSize;
uint32_t widthInBlocks = ((width + gridSize - 1) / gridSize);
uint32_t heightInBlocks = ((height + gridSize - 1) / gridSize);;
int blockIdX = x / gridSize;
int blockIdY = y / gridSize;
int widthInBlocks = ((imageSize.width + gridSize - 1) / gridSize);
int heightInBlocks = ((imageSize.height + gridSize - 1) / gridSize);;
if ((blockIdX < widthInBlocks) && (blockIdY < heightInBlocks))
{
flowVectors[(y * 2 * width) + 2 * x] = (float)
flowVectors[(y * 2 * imageSize.width) + 2 * x] = (float)
(_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowx / (float)(1 << 5));
flowVectors[(y * 2 * width) + 2 * x + 1] = (float)
flowVectors[(y * 2 * imageSize.width) + 2 * x + 1] = (float)
(_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowy / (float)(1 << 5));
}
}
}
Mat output(Size(width, height), CV_32FC2, flowVectors.get());
Mat output(Size(imageSize.width, imageSize.height), CV_32FC2, flowVectors.get());
if (upsampledFlow.isMat())
{
output.copyTo(upsampledFlow);
@ -656,18 +679,603 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
CV_Error(Error::StsBadArg,
"Incorrect flow buffer passed for upsampled flow. Pass either Mat or GpuMat");
}
}
class NvidiaOpticalFlowImpl_2 : public cv::cuda::NvidiaOpticalFlow_2_0
{
private:
int m_width;
int m_height;
NV_OF_PERF_LEVEL m_preset;
NV_OF_OUTPUT_VECTOR_GRID_SIZE m_gridSize;
NV_OF_HINT_VECTOR_GRID_SIZE m_hintGridSize;
bool m_enableROI;
std::vector<Rect> m_roiDataRect;
NV_OF_ROI_RECT* m_roiData;
bool m_enableTemporalHints;
bool m_enableExternalHints;
bool m_enableCostBuffer;
int m_gpuId;
Stream m_inputStream;
Stream m_outputStream;
CUcontext m_cuContext;
int m_scaleFactor;
NV_OF_BUFFER_FORMAT m_format;
NV_OF_OUTPUT_VECTOR_GRID_SIZE m_hwGridSize;
NV_OF_BUFFER_DESCRIPTOR m_inputBufferDesc;
NV_OF_BUFFER_DESCRIPTOR m_outputBufferDesc;
NV_OF_BUFFER_DESCRIPTOR m_hintBufferDesc;
NV_OF_BUFFER_DESCRIPTOR m_costBufferDesc;
int m_outputElementSize;
int m_costBufElementSize;
int m_hintBufElementSize;
NV_OF_INIT_PARAMS m_initParams;
std::unique_ptr<NV_OF_CUDA_API_FUNCTION_LIST> m_ofAPI;
NvOFHandle m_hOF; //nvof handle
NvOFGPUBufferHandle m_hInputBuffer;
NvOFGPUBufferHandle m_hReferenceBuffer;
NvOFGPUBufferHandle m_hOutputBuffer;
NvOFGPUBufferHandle m_hOutputUpScaledBuffer;
NvOFGPUBufferHandle m_hHintBuffer;
NvOFGPUBufferHandle m_hCostBuffer;
CUdeviceptr m_frame0cuDevPtr;
CUdeviceptr m_frame1cuDevPtr;
CUdeviceptr m_flowXYcuDevPtr;
CUdeviceptr m_flowXYUpScaledcuDevPtr;
CUdeviceptr m_hintcuDevPtr;
CUdeviceptr m_costcuDevPtr;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_inputBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_referenceBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputUpScaledBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_hintBufferStrideInfo;
NV_OF_CUDA_BUFFER_STRIDE_INFO m_costBufferStrideInfo;
NV_OF_CUDA_API_FUNCTION_LIST* GetAPI()
{
std::lock_guard<std::mutex> lock(m_lock);
return m_ofAPI.get();
}
NvOFHandle GetHandle() { return m_hOF; }
protected:
std::mutex m_lock;
public:
NvidiaOpticalFlowImpl_2(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset,
NV_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NV_OF_HINT_VECTOR_GRID_SIZE hintGridSize,
bool bEnableROI, std::vector<Rect> roiData, bool bEnableTemporalHints,
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream);
virtual void calc(InputArray inputImage, InputArray referenceImage,
InputOutputArray flow, Stream& stream = Stream::Null(),
InputArray hint = cv::noArray(), OutputArray cost = cv::noArray());
virtual void collectGarbage();
virtual void convertToFloat(InputArray flow, InputOutputArray floatFlow);
virtual int getGridSize() const { return m_gridSize; }
};
NvidiaOpticalFlowImpl_2::NvidiaOpticalFlowImpl_2(
cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset,
NV_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NV_OF_HINT_VECTOR_GRID_SIZE hintGridSize,
bool bEnableROI, std::vector<Rect> roiData, bool bEnableTemporalHints,
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream) :
m_width(imageSize.width), m_height(imageSize.height), m_preset(perfPreset),
m_gridSize(outputGridSize), m_hintGridSize(hintGridSize),
m_enableROI(bEnableROI), m_roiDataRect(roiData),
m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints),
m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints),
m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId),
m_inputStream(inputStream), m_outputStream(outputStream),
m_cuContext(nullptr), m_scaleFactor(1), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
m_hwGridSize((NV_OF_OUTPUT_VECTOR_GRID_SIZE)0)
{
LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init();
int nGpu = 0;
cuSafeCall(cudaGetDeviceCount(&nGpu));
if (m_gpuId < 0 || m_gpuId >= nGpu)
{
CV_Error(Error::StsBadArg, "Invalid GPU Ordinal");
}
cuSafeCall(cudaSetDevice(m_gpuId));
cuSafeCall(cudaFree(m_cuContext));
cuSafeCall(LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr()(&m_cuContext));
if (m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 &&
m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 &&
m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
{
CV_Error(Error::StsBadArg, "Unsupported output grid size");
}
if (m_enableExternalHints)
{
if (m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_1 &&
m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_2 &&
m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_4 &&
m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_8)
{
CV_Error(Error::StsBadArg, "Unsupported hint grid size");
}
}
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST());
NVOF_API_CALL(LoadNvidiaModulesObj.GetOFLibraryFunctionPtr()(NV_OF_API_VERSION, m_ofAPI.get()));
NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF));
m_roiData = (NV_OF_ROI_RECT*)m_roiDataRect.data();
uint32_t size = 0;
if (m_enableROI)
{
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORT_ROI, nullptr, &size));
std::unique_ptr<uint32_t[]> val1(new uint32_t[size]);
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORT_ROI, val1.get(), &size));
if (val1[0] != NV_OF_TRUE)
{
m_enableROI = false;
m_roiData = nullptr;
CV_Error(Error::StsBadFunc, "ROI not supported on this GPU");
}
}
size = 0;
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, nullptr, &size));
std::unique_ptr<uint32_t[]> val2(new uint32_t[size]);
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, val2.get(), &size));
for (uint32_t i = 0; i < size; i++)
{
if (m_gridSize != val2[i])
{
size = 0;
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, nullptr, &size));
std::unique_ptr<uint32_t[]> val3(new uint32_t[size]);
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, val3.get(), &size));
m_hwGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX;
for (uint32_t i = 0; i < size; i++)
{
if (m_gridSize == val3[i])
{
m_hwGridSize = m_gridSize;
break;
}
if (m_gridSize < val3[i] && val3[i] < m_hwGridSize)
{
m_hwGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)val3[i];
}
}
if (m_hwGridSize >= (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX)
{
CV_Error(Error::StsBadArg, "Invalid Grid Size");
}
else
{
m_scaleFactor = m_hwGridSize / m_gridSize;
}
}
else
{
m_hwGridSize = m_gridSize;
}
}
auto nOutWidth = (m_width + m_hwGridSize - 1) / m_hwGridSize;
auto nOutHeight = (m_height + m_hwGridSize - 1) / m_hwGridSize;
auto outBufFmt = NV_OF_BUFFER_FORMAT_SHORT2;
memset(&m_inputBufferDesc, 0, sizeof(m_inputBufferDesc));
m_inputBufferDesc.width = m_width;
m_inputBufferDesc.height = m_height;
m_inputBufferDesc.bufferFormat = m_format;
m_inputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT;
memset(&m_outputBufferDesc, 0, sizeof(m_outputBufferDesc));
m_outputBufferDesc.width = nOutWidth;
m_outputBufferDesc.height = nOutHeight;
m_outputBufferDesc.bufferFormat = outBufFmt;
m_outputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT;
m_outputElementSize = sizeof(NV_OF_FLOW_VECTOR);
if (m_enableExternalHints)
{
memset(&m_hintBufferDesc, 0, sizeof(m_hintBufferDesc));
m_hintBufferDesc.width = nOutWidth;
m_hintBufferDesc.height = nOutHeight;
m_hintBufferDesc.bufferFormat = outBufFmt;
m_hintBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_HINT;
m_hintBufElementSize = m_outputElementSize;
}
if (m_enableCostBuffer)
{
memset(&m_costBufferDesc, 0, sizeof(m_costBufferDesc));
m_costBufferDesc.width = nOutWidth;
m_costBufferDesc.height = nOutHeight;
m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT8;
m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST;
m_costBufElementSize = sizeof(int);
}
memset(&m_initParams, 0, sizeof(m_initParams));
m_initParams.width = m_inputBufferDesc.width;
m_initParams.height = m_inputBufferDesc.height;
m_initParams.enableExternalHints = (NV_OF_BOOL)m_enableExternalHints;
m_initParams.enableOutputCost = (NV_OF_BOOL)m_enableCostBuffer;
m_initParams.hintGridSize = (NV_OF_BOOL)m_enableExternalHints == NV_OF_TRUE ?
m_hintGridSize : (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED;
m_initParams.outGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)m_hwGridSize;
m_initParams.mode = NV_OF_MODE_OPTICALFLOW;
m_initParams.perfLevel = m_preset;
m_initParams.enableRoi = (NV_OF_BOOL)m_enableROI;
NVOF_API_CALL(GetAPI()->nvOFInit(GetHandle(), &m_initParams));
if (m_inputStream || m_outputStream)
{
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream)));
}
//Input Buffer 1
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hInputBuffer));
m_frame0cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hInputBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hInputBuffer, &m_inputBufferStrideInfo));
//Input Buffer 2
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hReferenceBuffer));
m_frame1cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hReferenceBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hReferenceBuffer, &m_referenceBufferStrideInfo));
//Output Buffer
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputBuffer));
m_flowXYcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hOutputBuffer, &m_outputBufferStrideInfo));
if (m_scaleFactor > 1)
{
m_outputBufferDesc.width = (m_width + m_gridSize - 1) / m_gridSize;;
m_outputBufferDesc.height = (m_height + m_gridSize - 1) / m_gridSize;;
//Output UpScaled Buffer
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputUpScaledBuffer));
m_flowXYUpScaledcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputUpScaledBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hOutputUpScaledBuffer, &m_outputUpScaledBufferStrideInfo));
}
//Hint Buffer
if (m_enableExternalHints)
{
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_hintBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hHintBuffer));
m_hintcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hHintBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hHintBuffer, &m_hintBufferStrideInfo));
}
//Cost Buffer
if (m_enableCostBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
&m_costBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hCostBuffer));
m_costcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hCostBuffer);
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo(
m_hCostBuffer, &m_costBufferStrideInfo));
}
}
void NvidiaOpticalFlowImpl_2::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow,
Stream& stream, InputArray hint, OutputArray cost)
{
CV_UNUSED(stream);
GpuMat frame0GpuMat(_frame0.size(), _frame0.type(), (void*)m_frame0cuDevPtr,
m_inputBufferStrideInfo.strideInfo[0].strideXInBytes);
GpuMat frame1GpuMat(_frame1.size(), _frame1.type(), (void*)m_frame1cuDevPtr,
m_referenceBufferStrideInfo.strideInfo[0].strideXInBytes);
GpuMat flowXYGpuMat(Size((m_width + m_hwGridSize - 1) / m_hwGridSize,
(m_height + m_hwGridSize - 1) / m_hwGridSize), CV_16SC2,
(void*)m_flowXYcuDevPtr, m_outputBufferStrideInfo.strideInfo[0].strideXInBytes);
GpuMat flowXYGpuMatUpScaled(Size((m_width + m_gridSize - 1) / m_gridSize,
(m_height + m_gridSize - 1) / m_gridSize), CV_16SC2,
(void*)m_flowXYUpScaledcuDevPtr, m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes);
//check whether frame0 is Mat or GpuMat
if (_frame0.isMat())
{
//Get Mats from InputArrays
Mat __frame0 = _frame0.getMat();
frame0GpuMat.upload(__frame0, m_inputStream);
}
else if (_frame0.isGpuMat())
{
//Get GpuMats from InputArrays
GpuMat __frame0 = _frame0.getGpuMat();
__frame0.copyTo(frame0GpuMat, m_inputStream);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect input. Pass input image (frame0) as Mat or GpuMat");
}
//check whether frame1 is Mat or GpuMat
if (_frame1.isMat())
{
//Get Mats from InputArrays
Mat __frame1 = _frame1.getMat();
frame1GpuMat.upload(__frame1, m_inputStream);
}
else if (_frame1.isGpuMat())
{
//Get GpuMats from InputArrays
GpuMat __frame1 = _frame1.getGpuMat();
__frame1.copyTo(frame1GpuMat, m_inputStream);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect input. Pass reference image (frame1) as Mat or GpuMat");
}
if (m_enableExternalHints)
{
GpuMat hintGpuMat(hint.size(), hint.type(), (void*)m_hintcuDevPtr,
m_hintBufferStrideInfo.strideInfo[0].strideXInBytes);
if (hint.isMat())
{
//Get Mat from InputArray hint
Mat _hint = hint.getMat();
hintGpuMat.upload(_hint, m_inputStream);
}
else if (hint.isGpuMat())
{
//Get GpuMat from InputArray hint
GpuMat _hint = hint.getGpuMat();
_hint.copyTo(hintGpuMat, m_inputStream);
}
else
{
CV_Error(Error::StsBadArg, "Incorrect hint buffer passed. Pass Mat or GpuMat");
}
}
//Execute Call
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams;
memset(&exeInParams, 0, sizeof(exeInParams));
exeInParams.inputFrame = m_hInputBuffer;
exeInParams.referenceFrame = m_hReferenceBuffer;
exeInParams.disableTemporalHints = (NV_OF_BOOL)m_enableTemporalHints == NV_OF_TRUE ?
NV_OF_FALSE : NV_OF_TRUE;
exeInParams.externalHints = m_initParams.enableExternalHints == NV_OF_TRUE ?
m_hHintBuffer : nullptr;
exeInParams.numRois = m_initParams.enableRoi == NV_OF_TRUE ? m_roiDataRect.size() : 0;
exeInParams.roiData = m_initParams.enableRoi == NV_OF_TRUE ? m_roiData : nullptr;
memset(&exeOutParams, 0, sizeof(exeOutParams));
exeOutParams.outputBuffer = m_hOutputBuffer;
exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE ?
m_hCostBuffer : nullptr;
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams));
if (m_scaleFactor > 1)
{
uint32_t nSrcWidth = flowXYGpuMat.size().width;
uint32_t nSrcHeight = flowXYGpuMat.size().height;
uint32_t nSrcPitch = m_outputBufferStrideInfo.strideInfo[0].strideXInBytes;
uint32_t nDstWidth = flowXYGpuMatUpScaled.size().width;
uint32_t nDstHeight = flowXYGpuMatUpScaled.size().height;
uint32_t nDstPitch = m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes;
cv::cuda::device::optflow_nvidia::FlowUpsample((void*)m_flowXYcuDevPtr, nSrcWidth, nSrcPitch,
nSrcHeight, (void*)m_flowXYUpScaledcuDevPtr, nDstWidth, nDstPitch, nDstHeight, m_scaleFactor);
if (_flow.isMat())
flowXYGpuMatUpScaled.download(_flow, m_outputStream);
else if (_flow.isGpuMat())
flowXYGpuMatUpScaled.copyTo(_flow, m_outputStream);
else
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat");
}
else
{
if (_flow.isMat())
flowXYGpuMat.download(_flow, m_outputStream);
else if (_flow.isGpuMat())
flowXYGpuMat.copyTo(_flow, m_outputStream);
else
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat");
}
if (m_enableCostBuffer)
{
GpuMat costGpuMat(Size((m_width + m_hwGridSize - 1) / m_hwGridSize,
(m_height + m_hwGridSize - 1) / m_hwGridSize), CV_8SC1, (void*)m_costcuDevPtr,
m_costBufferStrideInfo.strideInfo[0].strideXInBytes);
if (cost.isMat())
costGpuMat.download(cost, m_outputStream);
else if (cost.isGpuMat())
costGpuMat.copyTo(cost, m_outputStream);
else
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat");
}
m_outputStream.waitForCompletion();
}
void NvidiaOpticalFlowImpl_2::collectGarbage()
{
if (m_enableROI)
{
m_roiData = nullptr;
}
if (m_hInputBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hInputBuffer));
}
if (m_hReferenceBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hReferenceBuffer));
}
if (m_hOutputBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputBuffer));
}
if (m_scaleFactor > 1 && m_hOutputUpScaledBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputUpScaledBuffer));
}
if (m_enableExternalHints)
{
if (m_hHintBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hHintBuffer));
}
}
if (m_enableCostBuffer)
{
if (m_hCostBuffer)
{
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer));
}
}
if (m_inputStream)
{
m_inputStream.waitForCompletion();
}
if (m_outputStream)
{
m_outputStream.waitForCompletion();
}
if (m_hOF)
{
NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF));
}
}
void NvidiaOpticalFlowImpl_2::convertToFloat(InputArray _flow, InputOutputArray floatFlow)
{
Mat flow;
if (_flow.isMat())
{
Mat __flow = _flow.getMat();
__flow.copyTo(flow);
}
else if (_flow.isGpuMat())
{
GpuMat __flow = _flow.getGpuMat();
__flow.download(flow);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect flow buffer passed. Pass either Mat or GpuMat");
}
int width = flow.size().width;
int height = flow.size().height;
Mat output(Size(width, height), CV_32FC2);
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < (int)(width * sizeof(int16_t)); ++x)
{
output.at<float>(y, x) = (float)(flow.at<int16_t>(y, x) / (float)(1 << 5));
}
}
if (floatFlow.isMat())
{
output.copyTo(floatFlow);
}
else if (floatFlow.isGpuMat())
{
GpuMat _output(output);
_output.copyTo(floatFlow);
}
else
{
CV_Error(Error::StsBadArg,
"Incorrect flow buffer passed for upsampled flow. Pass either Mat or GpuMat");
}
}}
Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(
int width, int height, NVIDIA_OF_PERF_LEVEL perfPreset,
cv::Size imageSize, NVIDIA_OF_PERF_LEVEL perfPreset,
bool bEnableTemporalHints, bool bEnableExternalHints,
bool bEnableCostBuffer, int gpuId,
Stream& inputStream, Stream& outputStream)
{
return makePtr<NvidiaOpticalFlowImpl>(
width,
height,
imageSize,
(NV_OF_PERF_LEVEL)perfPreset,
bEnableTemporalHints,
bEnableExternalHints,
bEnableCostBuffer,
gpuId,
inputStream,
outputStream);
}
Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create(
cv::Size imageSize, NVIDIA_OF_PERF_LEVEL perfPreset,
NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize,
bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer,
int gpuId, Stream& inputStream, Stream& outputStream)
{
std::vector<Rect> roi(0);
return makePtr<NvidiaOpticalFlowImpl_2>(
imageSize,
(NV_OF_PERF_LEVEL)perfPreset,
(NV_OF_OUTPUT_VECTOR_GRID_SIZE)outputGridSize,
(NV_OF_HINT_VECTOR_GRID_SIZE)hintGridSize,
false,
roi,
bEnableTemporalHints,
bEnableExternalHints,
bEnableCostBuffer,
gpuId,
inputStream,
outputStream);
}
Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create(
cv::Size imageSize, std::vector<Rect> roiData, NVIDIA_OF_PERF_LEVEL perfPreset,
NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize,
bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer,
int gpuId, Stream& inputStream, Stream& outputStream)
{
return makePtr<NvidiaOpticalFlowImpl_2>(
imageSize,
(NV_OF_PERF_LEVEL)perfPreset,
(NV_OF_OUTPUT_VECTOR_GRID_SIZE)outputGridSize,
(NV_OF_HINT_VECTOR_GRID_SIZE)hintGridSize,
true,
roiData,
bEnableTemporalHints,
bEnableExternalHints,
bEnableCostBuffer,

@ -495,19 +495,11 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression)
cv::Mat frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
@ -519,7 +511,7 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression)
Mat flow, upsampledFlow;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
d_nvof->upSampler(flow, width, height, gridSize, upsampledFlow);
d_nvof->upSampler(flow, frame0.size(), gridSize, upsampledFlow);
std::string fname(cvtest::TS::ptr()->get_data_path());
fname += "opticalflow/nvofGolden.flo";
@ -527,6 +519,7 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression)
ASSERT_FALSE(golden.empty());
EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10);
d_nvof->collectGarbage();
}
CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan)
@ -539,19 +532,11 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan)
cv::Mat r_frame0, r_frame1;
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
@ -569,9 +554,96 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan)
EXPECT_TRUE(cv::checkRange(flowx));
EXPECT_TRUE(cv::checkRange(flowy));
d_nvof->collectGarbage();
};
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_1_0, ALL_DEVICES);
//////////////////////////////////////////////////////
// NvidiaOpticalFlow_2_0
struct NvidiaOpticalFlow_2_0 : testing::TestWithParam<cv::cuda::DeviceInfo>
{
cv::cuda::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(NvidiaOpticalFlow_2_0, Regression)
{
cv::Mat frame0 = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
Mat flow, upsampledFlow;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
d_nvof->convertToFloat(flow, upsampledFlow);
std::string fname(cvtest::TS::ptr()->get_data_path());
fname += "opticalflow/nvofGolden_2.flo";
cv::Mat golden = cv::readOpticalFlow(fname.c_str());
ASSERT_FALSE(golden.empty());
EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10);
d_nvof->collectGarbage();
}
CUDA_TEST_P(NvidiaOpticalFlow_2_0, OpticalFlowNan)
{
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Mat r_frame0, r_frame1;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
Mat flow, flowx, flowy;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
Mat planes[] = { flowx, flowy };
split(flow, planes);
flowx = planes[0]; flowy = planes[1];
EXPECT_TRUE(cv::checkRange(flowx));
EXPECT_TRUE(cv::checkRange(flowy));
d_nvof->collectGarbage();
};
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_2_0, ALL_DEVICES);
}} // namespace
#endif // HAVE_CUDA

Loading…
Cancel
Save