diff --git a/modules/cudaoptflow/CMakeLists.txt b/modules/cudaoptflow/CMakeLists.txt index e5b823ab4..7d2d3e74e 100644 --- a/modules/cudaoptflow/CMakeLists.txt +++ b/modules/cudaoptflow/CMakeLists.txt @@ -8,21 +8,21 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-d ocv_define_module(cudaoptflow opencv_video opencv_optflow opencv_cudaarithm opencv_cudawarping opencv_cudaimgproc OPTIONAL opencv_cudalegacy WRAP python) -set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT "79c6cee80a2df9a196f20afd6b598a9810964c32") -set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5 "ca5acedee6cb45d0ec610a6732de5c15") -set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_1_0_Headers") -ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}.zip" - HASH ${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5} +set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT "edb50da3cf849840d680249aa6dbef248ebce2ca") +set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_MD5 "a73cd48b18dcc0cc8933b30796074191") +set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_2_0_Headers") +ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT}.zip" + HASH ${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_MD5} URL "https://github.com/NVIDIA/NVIDIAOpticalFlowSDK/archive/" - DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}" - STATUS NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS + DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH}" + STATUS NVIDIA_OPTICAL_FLOW_2_0_HEADERS_DOWNLOAD_SUCCESS ID "NVIDIA_OPTICAL_FLOW" UNPACK RELATIVE_URL) -if(NOT NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS) - message(STATUS "Failed to download NVIDIA_Optical_Flow_1_0 Headers") +if(NOT NVIDIA_OPTICAL_FLOW_2_0_HEADERS_DOWNLOAD_SUCCESS) + message(STATUS "Failed to download NVIDIA_Optical_Flow_2_0 Headers") else() add_definitions(-DHAVE_NVIDIA_OPTFLOW=1) - ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}") + ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT}") endif() \ No newline at end of file diff --git a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp index 9fde39252..3221dbc7e 100644 --- a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp +++ b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp @@ -392,9 +392,9 @@ public: /** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 1.0. @note - A sample application demonstrating the use of NVIDIA Optical Flow can be found at -opencv_source_code/samples/gpu/nvidia_optical_flow.cpp +opencv_contrib_source_code/modules/cudaoptflow/samples/nvidia_optical_flow.cpp - An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at -opencv_source_code/samples/gpu/optical_flow.cpp +opencv_contrib_source_code/modules/cudaoptflow/samples/optical_flow.cpp */ class CV_EXPORTS_W NvidiaOpticalFlow_1_0 : public NvidiaHWOpticalFlow @@ -417,18 +417,16 @@ public: * using nearest neighbour upsampling method. @param flow Buffer of type CV_16FC2 containing flow vectors generated by calc(). - @param width Width of the input image in pixels for which these flow vectors were generated. - @param height Height of the input image in pixels for which these flow vectors were generated. + @param imageSize Size of the input image in pixels for which these flow vectors were generated. @param gridSize Granularity of the optical flow vectors returned by calc() function. Can be queried using getGridSize(). @param upsampledFlow Buffer of type CV_32FC2, containing upsampled flow vectors, each flow vector for 1 pixel, in the pitch-linear layout. */ - CV_WRAP virtual void upSampler(InputArray flow, int width, int height, + CV_WRAP virtual void upSampler(InputArray flow, cv::Size imageSize, int gridSize, InputOutputArray upsampledFlow) = 0; /** @brief Instantiate NVIDIA Optical Flow - @param width Width of input image in pixels. - @param height Height of input image in pixels. + @param imageSize Size of input image in pixels. @param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets. Defaults to NV_OF_PERF_LEVEL_SLOW. @param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors @@ -445,10 +443,142 @@ public: If output stream is not set, the execute function will use default stream which is NULL stream; */ CV_WRAP static Ptr create( - int width, - int height, + cv::Size imageSize, cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset - = cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW, + = cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW, + bool enableTemporalHints = false, + bool enableExternalHints = false, + bool enableCostBuffer = false, + int gpuId = 0, + Stream& inputStream = Stream::Null(), + Stream& outputStream = Stream::Null()); +}; + +/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 2.0. +@note +- A sample application demonstrating the use of NVIDIA Optical Flow can be found at +opencv_contrib_source_code/modules/cudaoptflow/samples/nvidia_optical_flow.cpp +- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at +opencv_contrib_source_code/modules/cudaoptflow/samples/optical_flow.cpp +*/ + +class CV_EXPORTS_W NvidiaOpticalFlow_2_0 : public NvidiaHWOpticalFlow +{ +public: + /** + * Supported optical flow performance levels. + */ + enum NVIDIA_OF_PERF_LEVEL + { + NV_OF_PERF_LEVEL_UNDEFINED, + NV_OF_PERF_LEVEL_SLOW = 5, /**< Slow perf level results in lowest performance and best quality */ + NV_OF_PERF_LEVEL_MEDIUM = 10, /**< Medium perf level results in low performance and medium quality */ + NV_OF_PERF_LEVEL_FAST = 20, /**< Fast perf level results in high performance and low quality */ + NV_OF_PERF_LEVEL_MAX + }; + + /** + * Supported grid size for output buffer. + */ + enum NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE + { + NV_OF_OUTPUT_VECTOR_GRID_SIZE_UNDEFINED, + NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 = 1, /**< Output buffer grid size is 1x1 */ + NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 = 2, /**< Output buffer grid size is 2x2 */ + NV_OF_OUTPUT_VECTOR_GRID_SIZE_4 = 4, /**< Output buffer grid size is 4x4 */ + NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX + }; + + /** + * Supported grid size for hint buffer. + */ + enum NVIDIA_OF_HINT_VECTOR_GRID_SIZE + { + NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED, + NV_OF_HINT_VECTOR_GRID_SIZE_1 = 1, /**< Hint buffer grid size is 1x1.*/ + NV_OF_HINT_VECTOR_GRID_SIZE_2 = 2, /**< Hint buffer grid size is 2x2.*/ + NV_OF_HINT_VECTOR_GRID_SIZE_4 = 4, /**< Hint buffer grid size is 4x4.*/ + NV_OF_HINT_VECTOR_GRID_SIZE_8 = 8, /**< Hint buffer grid size is 8x8.*/ + NV_OF_HINT_VECTOR_GRID_SIZE_MAX + }; + + /** @brief convertToFloat() helper function converts the hardware-generated flow vectors to floating point representation (1 flow vector for gridSize). + * gridSize can be queried via function getGridSize(). + + @param flow Buffer of type CV_16FC2 containing flow vectors generated by calc(). + @param floatFlow Buffer of type CV_32FC2, containing flow vectors in floating point representation, each flow vector for 1 pixel per gridSize, in the pitch-linear layout. + */ + CV_WRAP virtual void convertToFloat(InputArray flow, InputOutputArray floatFlow) = 0; + + /** @brief Instantiate NVIDIA Optical Flow + + @param imageSize Size of input image in pixels. + @param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets. + Defaults to NV_OF_PERF_LEVEL_SLOW. + @param outputGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about output grid sizes. + Defaults to NV_OF_OUTPUT_VECTOR_GRID_SIZE_1. + @param hintGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about hint grid sizes. + Defaults to NV_OF_HINT_VECTOR_GRID_SIZE_1. + @param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors + generated in previous call to calc() as internal hints for the current call to calc(). + Useful when computing flow vectors between successive video frames. Defaults to false. + @param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false. + @param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false. + @param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0. + @param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers. + The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine. + If input stream is not set, the execute function will use default stream which is NULL stream; + @param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors. + The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine. + If output stream is not set, the execute function will use default stream which is NULL stream; + */ + CV_WRAP static Ptr create( + cv::Size imageSize, + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset + = cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_PERF_LEVEL_SLOW, + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize + = cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1, + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize + = cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_1, + bool enableTemporalHints = false, + bool enableExternalHints = false, + bool enableCostBuffer = false, + int gpuId = 0, + Stream& inputStream = Stream::Null(), + Stream& outputStream = Stream::Null()); + + /** @brief Instantiate NVIDIA Optical Flow with ROI Feature + + @param imageSize Size of input image in pixels. + @param roiData Pointer to ROI data. + @param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets. + Defaults to NV_OF_PERF_LEVEL_SLOW. + @param outputGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about output grid sizes. + Defaults to NV_OF_OUTPUT_VECTOR_GRID_SIZE_1. + @param hintGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about hint grid sizes. + Defaults to NV_OF_HINT_VECTOR_GRID_SIZE_1. + @param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors + generated in previous call to calc() as internal hints for the current call to calc(). + Useful when computing flow vectors between successive video frames. Defaults to false. + @param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false. + @param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false. + @param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0. + @param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers. + The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine. + If input stream is not set, the execute function will use default stream which is NULL stream; + @param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors. + The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine. + If output stream is not set, the execute function will use default stream which is NULL stream; + */ + CV_WRAP static Ptr create( + cv::Size imageSize, + std::vector roiData, + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset + = cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_PERF_LEVEL_SLOW, + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize + = cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1, + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize + = cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_1, bool enableTemporalHints = false, bool enableExternalHints = false, bool enableCostBuffer = false, diff --git a/modules/cudaoptflow/misc/python/test/test_nvidiaopticalflow.py b/modules/cudaoptflow/misc/python/test/test_nvidiaopticalflow.py new file mode 100644 index 000000000..94822c408 --- /dev/null +++ b/modules/cudaoptflow/misc/python/test/test_nvidiaopticalflow.py @@ -0,0 +1,36 @@ +import os +import cv2 as cv +import numpy as np + +from tests_common import NewOpenCVTests, unittest + +class nvidiaopticalflow_test(NewOpenCVTests): + def setUp(self): + super(nvidiaopticalflow_test, self).setUp() + if not cv.cuda.getCudaEnabledDeviceCount(): + self.skipTest("No CUDA-capable device is detected") + + @unittest.skipIf('OPENCV_TEST_DATA_PATH' not in os.environ, + "OPENCV_TEST_DATA_PATH is not defined") + def test_calc(self): + frame1 = os.environ['OPENCV_TEST_DATA_PATH'] + '/gpu/opticalflow/frame0.png' + frame2 = os.environ['OPENCV_TEST_DATA_PATH'] + '/gpu/opticalflow/frame1.png' + + npMat1 = cv.cvtColor(cv.imread(frame1),cv.COLOR_BGR2GRAY) + npMat2 = cv.cvtColor(cv.imread(frame2),cv.COLOR_BGR2GRAY) + + cuMat1 = cv.cuda_GpuMat(npMat1) + cuMat2 = cv.cuda_GpuMat(npMat2) + try: + nvof = cv.cuda_NvidiaOpticalFlow_1_0.create(cuMat1.shape[1], cuMat1.shape[0], 5, False, False, False, 0) + flow = nvof.calc(cuMat1, cuMat2, None) + self.assertTrue(flow.shape[1] > 0 and flow.shape[0] > 0) + flowUpSampled = nvof.upSampler(flow[0], cuMat1.shape[1], cuMat1.shape[0], nvof.getGridSize(), None) + nvof.collectGarbage() + except cv.error as e: + if e.code == cv.Error.StsBadFunc or e.code == cv.Error.StsBadArg or e.code == cv.Error.StsNullPtr: + self.skipTest("Algorithm is not supported in the current environment") + self.assertTrue(flowUpSampled.shape[1] > 0 and flowUpSampled.shape[0] > 0) + +if __name__ == '__main__': + NewOpenCVTests.bootstrap() \ No newline at end of file diff --git a/modules/cudaoptflow/perf/perf_optflow.cpp b/modules/cudaoptflow/perf/perf_optflow.cpp index ceb4811f8..a9342c05d 100644 --- a/modules/cudaoptflow/perf/perf_optflow.cpp +++ b/modules/cudaoptflow/perf/perf_optflow.cpp @@ -339,13 +339,8 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0, const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame1.empty()); - - const int width = frame0.size().width; - const int height = frame0.size().height; - const bool enableTemporalHints = false; - const bool enableExternalHints = false; - const bool enableCostBuffer = false; - const int gpuid = 0; + Stream inputStream; + Stream outputStream; if (PERF_RUN_CUDA()) { @@ -355,9 +350,9 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0, cv::Ptr d_nvof; try { - d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height, + d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(), cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, - enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid); + false, false, false, 0, inputStream, outputStream); } catch (const cv::Exception& e) { @@ -376,6 +371,63 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0, CUDA_SANITY_CHECK(u, 1e-10); CUDA_SANITY_CHECK(v, 1e-10); + + d_nvof->collectGarbage(); + } +} + +////////////////////////////////////////////////////// +// NvidiaOpticalFlow_2_0 + +PERF_TEST_P(ImagePair, NvidiaOpticalFlow_2_0, + Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) +{ + declare.time(10); + + const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + const cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outGridSize + = cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1; + const cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize + = cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_1; + Stream inputStream; + Stream outputStream; + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_frame0(frame0); + const cv::cuda::GpuMat d_frame1(frame1); + cv::cuda::GpuMat d_flow; + cv::Ptr d_nvof; + try + { + d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(), + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, outGridSize, hintGridSize, + false, false, false, 0, inputStream, outputStream); + } + catch (const cv::Exception& e) + { + if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr) + throw SkipTestException("Current configuration is not supported"); + throw; + } + + TEST_CYCLE() d_nvof->calc(d_frame0, d_frame1, d_flow); + + cv::cuda::GpuMat flow[2]; + cv::cuda::split(d_flow, flow); + + cv::cuda::GpuMat u = flow[0]; + cv::cuda::GpuMat v = flow[1]; + + CUDA_SANITY_CHECK(u, 1e-10); + CUDA_SANITY_CHECK(v, 1e-10); + + d_nvof->collectGarbage(); } } diff --git a/modules/cudaoptflow/samples/nvidia_optical_flow.cpp b/modules/cudaoptflow/samples/nvidia_optical_flow.cpp index 478a5f161..5dd4a0345 100644 --- a/modules/cudaoptflow/samples/nvidia_optical_flow.cpp +++ b/modules/cudaoptflow/samples/nvidia_optical_flow.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include "opencv2/core.hpp" #include "opencv2/core/utility.hpp" @@ -11,7 +12,6 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/video/tracking.hpp" -using namespace std; using namespace cv; using namespace cv::cuda; @@ -131,12 +131,88 @@ static void drawOpticalFlow(const Mat_& flowx, const Mat_& flowy } } +/* +ROI config file format. +numrois 3 +roi0 640 96 1152 192 +roi1 640 64 896 864 +roi2 640 960 256 32 +*/ +bool parseROI(std::string ROIFileName, std::vector& roiData) +{ + std::string str; + uint32_t nRois = 0; + std::ifstream hRoiFile; + hRoiFile.open(ROIFileName, std::ios::in); + + if (hRoiFile.is_open()) + { + while (std::getline(hRoiFile, str)) + { + std::istringstream iss(str); + std::vector tokens{ std::istream_iterator{iss}, + std::istream_iterator{} }; + + if (tokens.size() == 0) continue; // if empty line, coninue + + transform(tokens[0].begin(), tokens[0].end(), tokens[0].begin(), ::tolower); + if (tokens[0] == "numrois") + { + nRois = atoi(tokens[1].data()); + } + else if (tokens[0].rfind("roi", 0) == 0) + { + cv::Rect roi; + roi.x = atoi(tokens[1].data()); + roi.y = atoi(tokens[2].data()); + roi.width = atoi(tokens[3].data()); + roi.height = atoi(tokens[4].data()); + roiData.push_back(roi); + } + else if (tokens[0].rfind("#", 0) == 0) + { + continue; + } + else + { + std::cout << "Unidentified keyword in roi config file " << tokens[0] << std::endl; + hRoiFile.close(); + return false; + } + } + } + else + { + std::cout << "Unable to open ROI file " << std::endl; + return false; + } + if (nRois != roiData.size()) + { + std::cout << "NumRois(" << nRois << ")and specified roi rects (" << roiData.size() << ")are not matching " << std::endl; + hRoiFile.close(); + return false; + } + hRoiFile.close(); + return true; +} + int main(int argc, char **argv) { - std::unordered_map presetMap = { - { "slow", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW }, - { "medium", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_MEDIUM }, - { "fast", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST } }; + std::unordered_map presetMap = { + { "slow", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW }, + { "medium", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_MEDIUM }, + { "fast", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST } }; + + std::unordered_map outputGridSize = { + { 1, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 }, + { 2, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 }, + { 4, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_4 } }; + + std::unordered_map hintGridSize = { + { 1, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_1 }, + { 2, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_2 }, + { 4, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_4 }, + { 8, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_8 } }; try { @@ -145,7 +221,10 @@ int main(int argc, char **argv) "{ r right | ../data/basketball2.png | specify right image }" "{ g gpuid | 0 | cuda device index}" "{ p preset | slow | perf preset for OF algo [ options : slow, medium, fast ]}" + "{ og outputGridSize | 1 | Output grid size of OF vector [ options : 1, 2, 4 ]}" + "{ hg hintGridSize | 1 | Hint grid size of OF vector [ options : 1, 2, 4, 8 ]}" "{ o output | OpenCVNvOF.flo | output flow vector file in middlebury format}" + "{ rc roiConfigFile | | Region of Interest config file }" "{ th enableTemporalHints | false | Enable temporal hints}" "{ eh enableExternalHints | false | Enable external hints}" "{ cb enableCostBuffer | false | Enable output cost buffer}" @@ -159,60 +238,93 @@ int main(int argc, char **argv) return 0; } - string pathL = cmd.get("left"); - string pathR = cmd.get("right"); - string preset = cmd.get("preset"); - string output = cmd.get("output"); + std::string pathL = cmd.get("left"); + std::string pathR = cmd.get("right"); + std::string preset = cmd.get("preset"); + std::string output = cmd.get("output"); + std::string roiConfiFile = cmd.get("roiConfigFile"); bool enableExternalHints = cmd.get("enableExternalHints"); bool enableTemporalHints = cmd.get("enableTemporalHints"); bool enableCostBuffer = cmd.get("enableCostBuffer"); int gpuId = cmd.get("gpuid"); + int outputBufferGridSize = cmd.get("outputGridSize"); + int hintBufferGridSize = cmd.get("hintGridSize"); - if (pathL.empty()) cout << "Specify left image path\n"; - if (pathR.empty()) cout << "Specify right image path\n"; - if (preset.empty()) cout << "Specify perf preset for OpticalFlow algo\n"; + if (pathL.empty()) std::cout << "Specify left image path" << std::endl; + if (pathR.empty()) std::cout << "Specify right image path" << std::endl; + if (preset.empty()) std::cout << "Specify perf preset for OpticalFlow algo" << std::endl; if (pathL.empty() || pathR.empty()) return 0; - auto search = presetMap.find(preset); - if (search == presetMap.end()) + auto p = presetMap.find(preset); + if (p == presetMap.end()) { std::cout << "Invalid preset level : " << preset << std::endl; return 0; } - NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset = search->second; + NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset = p->second; + + auto o = outputGridSize.find(outputBufferGridSize); + if (o == outputGridSize.end()) + { + std::cout << "Invalid output grid size: " << outputBufferGridSize << std::endl; + return 0; + } + NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outBufGridSize = o->second; + + NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintBufGridSize = + NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED; + if (enableExternalHints) + { + auto h = hintGridSize.find(hintBufferGridSize); + if (h == hintGridSize.end()) + { + std::cout << "Invalid hint grid size: " << hintBufferGridSize << std::endl; + return 0; + } + hintBufGridSize = h->second; + } + + std::vector roiData; + + if (!roiConfiFile.empty()) + { + if (!parseROI(roiConfiFile, roiData)) + { + std::cout << "Wrong Region of Interest config file, proceeding without ROI" << std::endl; + } + } Mat frameL = imread(pathL, IMREAD_GRAYSCALE); Mat frameR = imread(pathR, IMREAD_GRAYSCALE); - if (frameL.empty()) cout << "Can't open '" << pathL << "'\n"; - if (frameR.empty()) cout << "Can't open '" << pathR << "'\n"; + if (frameL.empty()) std::cout << "Can't open '" << pathL << "'" << std::endl; + if (frameR.empty()) std::cout << "Can't open '" << pathR << "'" << std::endl; if (frameL.empty() || frameR.empty()) return -1; - Ptr nvof = NvidiaOpticalFlow_1_0::create( - frameL.size().width, frameL.size().height, perfPreset, + Ptr nvof = NvidiaOpticalFlow_2_0::create( + frameL.size(), roiData, perfPreset, outBufGridSize, hintBufGridSize, enableTemporalHints, enableExternalHints, enableCostBuffer, gpuId); - Mat flowx, flowy, flowxy, upsampledFlowXY, image; + Mat flowx, flowy, flowxy, floatFlow, image; nvof->calc(frameL, frameR, flowxy); - nvof->upSampler(flowxy, frameL.size().width, frameL.size().height, - nvof->getGridSize(), upsampledFlowXY); + nvof->convertToFloat(flowxy, floatFlow); - if (output.size() != 0) + if (!output.empty()) { - if (!writeOpticalFlow(output, upsampledFlowXY)) - cout << "Failed to save Flow Vector" << endl; + if (!writeOpticalFlow(output, floatFlow)) + std::cout << "Failed to save Flow Vector" << std::endl; else - cout << "Flow vector saved as '" << output << "'\n"; + std::cout << "Flow vector saved as '" << output << "'" << std::endl; } Mat planes[] = { flowx, flowy }; - split(upsampledFlowXY, planes); + split(floatFlow, planes); flowx = planes[0]; flowy = planes[1]; drawOpticalFlow(flowx, flowy, image, 10); - imshow("Colorize image",image); + imshow("Colorize image", image); waitKey(0); nvof->collectGarbage(); } diff --git a/modules/cudaoptflow/samples/optical_flow.cpp b/modules/cudaoptflow/samples/optical_flow.cpp index c1353614e..4afd2dc0a 100644 --- a/modules/cudaoptflow/samples/optical_flow.cpp +++ b/modules/cudaoptflow/samples/optical_flow.cpp @@ -183,8 +183,11 @@ int main(int argc, const char* argv[]) Ptr lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7)); Ptr farn = cuda::FarnebackOpticalFlow::create(); Ptr tvl1 = cuda::OpticalFlowDual_TVL1::create(); - Ptr nvof = cuda::NvidiaOpticalFlow_1_0::create(frame0.size().width, frame0.size().height, + Ptr nvof_1_0 = cuda::NvidiaOpticalFlow_1_0::create(frame0.size(), NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, false, false, false, 0, inputStream, outputStream); + Ptr nvof_2_0 = cuda::NvidiaOpticalFlow_2_0::create(frame0.size(), + NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1, + NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED, false, false, false, 0, inputStream, outputStream); { GpuMat d_frame0f; @@ -242,16 +245,32 @@ int main(int argc, const char* argv[]) //Hence it is expected to be more than what is displayed in the NVIDIA Optical Flow SDK documentation. const int64 start = getTickCount(); - nvof->calc(d_frame0, d_frame1, d_flowxy); + nvof_1_0->calc(d_frame0, d_frame1, d_flowxy); const double timeSec = (getTickCount() - start) / getTickFrequency(); - cout << "NVIDIAOpticalFlow : " << timeSec << " sec" << endl; + cout << "NVIDIAOpticalFlow_1_0 : " << timeSec << " sec" << endl; - nvof->upSampler(d_flowxy, frame0.size().width, frame0.size().height, - nvof->getGridSize(), d_flow); + nvof_1_0->upSampler(d_flowxy, frame0.size(), nvof_1_0->getGridSize(), d_flow); - showFlow("NVIDIAOpticalFlow", d_flow); - nvof->collectGarbage(); + showFlow("NVIDIAOpticalFlow_1_0", d_flow); + nvof_1_0->collectGarbage(); + } + + { + //The timing displayed below includes the time taken to copy the input buffers to the OF CUDA input buffers + //and to copy the output buffers from the OF CUDA output buffer to the output buffer. + //Hence it is expected to be more than what is displayed in the NVIDIA Optical Flow SDK documentation. + const int64 start = getTickCount(); + + nvof_2_0->calc(d_frame0, d_frame1, d_flowxy); + + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "NVIDIAOpticalFlow_2_0 : " << timeSec << " sec" << endl; + + nvof_2_0->convertToFloat(d_flowxy, d_flow); + + showFlow("NVIDIAOpticalFlow_2_0", d_flow); + nvof_2_0->collectGarbage(); } imshow("Frame 0", frame0); diff --git a/modules/cudaoptflow/src/cuda/nvidiaOpticalFlow.cu b/modules/cudaoptflow/src/cuda/nvidiaOpticalFlow.cu new file mode 100644 index 000000000..1ad21143e --- /dev/null +++ b/modules/cudaoptflow/src/cuda/nvidiaOpticalFlow.cu @@ -0,0 +1,99 @@ +// +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +//M*/ +#if !defined CUDA_DISABLER + +#include +#include + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef signed short int16_t; +typedef signed int int32_t; + +#define BLOCKDIM_X 32 +#define BLOCKDIM_Y 16 + +// data required to do 2x upsampling. Same can be used for 4x upsampling also +#define SMEM_COLS ((BLOCKDIM_X)/2) +#define SMEM_ROWS ((BLOCKDIM_Y)/2) + +namespace cv { namespace cuda { namespace device { namespace optflow_nvidia +{ +static const char *_cudaGetErrorEnum(cudaError_t error) { return cudaGetErrorName(error); } + +template +void check(T result, char const *const func, const char *const file, + int const line) { + if (result) { + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line, + static_cast(result), _cudaGetErrorEnum(result), func); + // Make sure we call CUDA Device Reset before exiting + exit(EXIT_FAILURE); + } +} +#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) + +template +static __device__ void ReadDevPtrData(void* devptr, uint32_t x0, uint32_t y0, uint32_t src_w, uint32_t src_h, uint32_t src_pitch, + T src[][SMEM_COLS], uint32_t i, uint32_t j) +{ + uint32_t shift = (sizeof(T) == sizeof(int32_t)) ? 2 : 1; + src[j][i] = *(T*)((uint8_t*)devptr + y0 * src_pitch + (x0 << shift)); +} + + +extern "C" +__global__ void NearestNeighborFlowKernel(cudaSurfaceObject_t srcSurf, void* srcDevPtr, uint32_t src_w, uint32_t src_pitch, uint32_t src_h, + cudaSurfaceObject_t dstSurf, void* dstDevPtr, uint32_t dst_w, uint32_t dst_pitch, uint32_t dst_h, + uint32_t nScaleFactor) +{ + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; + + int x0 = x / nScaleFactor; + int y0 = y / nScaleFactor; + + __shared__ short2 src[SMEM_ROWS][SMEM_COLS]; + + int i = threadIdx.x / nScaleFactor; + int j = threadIdx.y / nScaleFactor; + + if ((x % nScaleFactor == 0) && (y % nScaleFactor == 0)) + { + ReadDevPtrData(srcDevPtr, x0, y0, src_w, src_h, src_pitch, src, i, j); + } + __syncthreads(); + + if (x < dst_w && y < dst_h) + { + if (dstDevPtr == NULL) + { + surf2Dwrite(src[j][i], dstSurf, x * sizeof(short2), y, cudaBoundaryModeClamp); + } + else + { + *(short2*)((uint8_t*)dstDevPtr + y * dst_pitch + (x << 2)) = src[j][i]; + } + } +} + +void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint32_t nSrcHeight, + void* dstDevPtr, uint32_t nDstWidth, uint32_t nDstPitch, uint32_t nDstHeight, + uint32_t nScaleFactor) +{ + + dim3 blockDim(BLOCKDIM_X, BLOCKDIM_Y); + dim3 gridDim((nDstWidth + blockDim.x - 1) / blockDim.x, (nDstHeight + blockDim.y - 1) / blockDim.y); + NearestNeighborFlowKernel << > > (0, srcDevPtr, nSrcWidth, nSrcPitch, nSrcHeight, + 0, dstDevPtr, nDstWidth, nDstPitch, nDstHeight, + nScaleFactor); + + checkCudaErrors(cudaGetLastError()); +}}}}} + +#endif \ No newline at end of file diff --git a/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp b/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp index b5c760da2..df7156989 100644 --- a/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp +++ b/modules/cudaoptflow/src/nvidiaOpticalFlow.cpp @@ -8,11 +8,27 @@ #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -cv::Ptr cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) { throw_no_cuda(); return cv::Ptr(); } +cv::Ptr cv::cuda::NvidiaOpticalFlow_1_0::create + (int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) { + throw_no_cuda(); return cv::Ptr(); } + +cv::Ptr cv::cuda::NvidiaOpticalFlow_2_0::create( + int, int, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE, + bool, int, cv::Rect*, bool, bool, bool, int, Stream&, Stream&) { + throw_no_cuda(); return cv::Ptr(); +} #elif !defined HAVE_NVIDIA_OPTFLOW -cv::Ptr cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) +cv::Ptr cv::cuda::NvidiaOpticalFlow_1_0::create( + int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) +{ + CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support"); +} + +cv::Ptr cv::cuda::NvidiaOpticalFlow_2_0::create( + int, int, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE, + bool, int, cv::Rect*, bool, bool, bool, int, , Stream&, Stream&) { CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support"); } @@ -22,6 +38,13 @@ cv::Ptr cv::cuda::NvidiaOpticalFlow_1_0::create #include "nvOpticalFlowCommon.h" #include "nvOpticalFlowCuda.h" +namespace cv { namespace cuda { namespace device { namespace optflow_nvidia +{ +void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint32_t nSrcHeight, + void* dstDevPtr, uint32_t nDstWidth, uint32_t nDstPitch, uint32_t nDstHeight, + uint32_t nScaleFactor); +}}}} + #if defined(_WIN32) || defined(_WIN64) #include #else @@ -123,7 +146,7 @@ class LoadNvidiaModules private: typedef int(*PFNCudaCuCtxGetCurrent)(CUcontext*); typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda) - (uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf); + (int apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf); PFNCudaCuCtxGetCurrent m_cudaDriverAPIGetCurrentCtx; PFNNvOFAPICreateInstanceCuda m_NvOFAPICreateInstanceCuda; @@ -249,9 +272,9 @@ private: NV_OF_BUFFER_DESCRIPTOR m_hintBufferDesc; NV_OF_BUFFER_DESCRIPTOR m_costBufferDesc; - uint32_t m_outputElementSize; - uint32_t m_costBufElementSize; - uint32_t m_hintBufElementSize; + int m_outputElementSize; + int m_costBufElementSize; + int m_hintBufElementSize; NV_OF_INIT_PARAMS m_initParams; @@ -288,7 +311,7 @@ protected: std::mutex m_lock; public: - NvidiaOpticalFlowImpl(int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints, + NvidiaOpticalFlowImpl(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream); virtual void calc(InputArray inputImage, InputArray referenceImage, @@ -298,17 +321,17 @@ public: virtual void collectGarbage(); - virtual void upSampler(InputArray flow, int width, int height, + virtual void upSampler(InputArray flow, cv::Size imageSize, int gridSize, InputOutputArray upsampledFlow); virtual int getGridSize() const { return m_gridSize; } }; NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl( - int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints, + cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream) : - m_width(width), m_height(height), m_preset(perfPreset), + m_width(imageSize.width), m_height(imageSize.height), m_preset(perfPreset), m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints), m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints), m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId), @@ -371,7 +394,7 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl( m_costBufferDesc.height = nOutHeight; m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT; m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST; - m_costBufElementSize = sizeof(uint32_t); + m_costBufElementSize = sizeof(int); } m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST()); @@ -600,7 +623,7 @@ void NvidiaOpticalFlowImpl::collectGarbage() } } -void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height, +void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, cv::Size imageSize, int gridSize, InputOutputArray upsampledFlow) { Mat flow; @@ -622,26 +645,26 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height, std::unique_ptr flowVectors = nullptr; const NV_OF_FLOW_VECTOR* _flowVectors = static_cast((const void*)flow.data); - flowVectors.reset(new float[2 * width * height]); - for (int y = 0; y < height; ++y) + flowVectors.reset(new float[2 * imageSize.width * imageSize.height]); + for (int y = 0; y < imageSize.height; ++y) { - for (int x = 0; x < width; ++x) + for (int x = 0; x < imageSize.width; ++x) { - uint32_t blockIdX = x / gridSize; - uint32_t blockIdY = y / gridSize; - uint32_t widthInBlocks = ((width + gridSize - 1) / gridSize); - uint32_t heightInBlocks = ((height + gridSize - 1) / gridSize);; + int blockIdX = x / gridSize; + int blockIdY = y / gridSize; + int widthInBlocks = ((imageSize.width + gridSize - 1) / gridSize); + int heightInBlocks = ((imageSize.height + gridSize - 1) / gridSize);; if ((blockIdX < widthInBlocks) && (blockIdY < heightInBlocks)) { - flowVectors[(y * 2 * width) + 2 * x] = (float) + flowVectors[(y * 2 * imageSize.width) + 2 * x] = (float) (_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowx / (float)(1 << 5)); - flowVectors[(y * 2 * width) + 2 * x + 1] = (float) + flowVectors[(y * 2 * imageSize.width) + 2 * x + 1] = (float) (_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowy / (float)(1 << 5)); } } } - Mat output(Size(width, height), CV_32FC2, flowVectors.get()); + Mat output(Size(imageSize.width, imageSize.height), CV_32FC2, flowVectors.get()); if (upsampledFlow.isMat()) { output.copyTo(upsampledFlow); @@ -656,18 +679,603 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height, CV_Error(Error::StsBadArg, "Incorrect flow buffer passed for upsampled flow. Pass either Mat or GpuMat"); } +} + +class NvidiaOpticalFlowImpl_2 : public cv::cuda::NvidiaOpticalFlow_2_0 +{ +private: + int m_width; + int m_height; + NV_OF_PERF_LEVEL m_preset; + NV_OF_OUTPUT_VECTOR_GRID_SIZE m_gridSize; + NV_OF_HINT_VECTOR_GRID_SIZE m_hintGridSize; + bool m_enableROI; + std::vector m_roiDataRect; + NV_OF_ROI_RECT* m_roiData; + bool m_enableTemporalHints; + bool m_enableExternalHints; + bool m_enableCostBuffer; + int m_gpuId; + Stream m_inputStream; + Stream m_outputStream; + + CUcontext m_cuContext; + int m_scaleFactor; + NV_OF_BUFFER_FORMAT m_format; + NV_OF_OUTPUT_VECTOR_GRID_SIZE m_hwGridSize; + + NV_OF_BUFFER_DESCRIPTOR m_inputBufferDesc; + NV_OF_BUFFER_DESCRIPTOR m_outputBufferDesc; + NV_OF_BUFFER_DESCRIPTOR m_hintBufferDesc; + NV_OF_BUFFER_DESCRIPTOR m_costBufferDesc; + + int m_outputElementSize; + int m_costBufElementSize; + int m_hintBufElementSize; + + NV_OF_INIT_PARAMS m_initParams; + + std::unique_ptr m_ofAPI; + NvOFHandle m_hOF; //nvof handle + + NvOFGPUBufferHandle m_hInputBuffer; + NvOFGPUBufferHandle m_hReferenceBuffer; + NvOFGPUBufferHandle m_hOutputBuffer; + NvOFGPUBufferHandle m_hOutputUpScaledBuffer; + NvOFGPUBufferHandle m_hHintBuffer; + NvOFGPUBufferHandle m_hCostBuffer; + + CUdeviceptr m_frame0cuDevPtr; + CUdeviceptr m_frame1cuDevPtr; + CUdeviceptr m_flowXYcuDevPtr; + CUdeviceptr m_flowXYUpScaledcuDevPtr; + CUdeviceptr m_hintcuDevPtr; + CUdeviceptr m_costcuDevPtr; + + NV_OF_CUDA_BUFFER_STRIDE_INFO m_inputBufferStrideInfo; + NV_OF_CUDA_BUFFER_STRIDE_INFO m_referenceBufferStrideInfo; + NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputBufferStrideInfo; + NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputUpScaledBufferStrideInfo; + NV_OF_CUDA_BUFFER_STRIDE_INFO m_hintBufferStrideInfo; + NV_OF_CUDA_BUFFER_STRIDE_INFO m_costBufferStrideInfo; + + NV_OF_CUDA_API_FUNCTION_LIST* GetAPI() + { + std::lock_guard lock(m_lock); + return m_ofAPI.get(); + } + + NvOFHandle GetHandle() { return m_hOF; } + +protected: + std::mutex m_lock; + +public: + NvidiaOpticalFlowImpl_2(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, + NV_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NV_OF_HINT_VECTOR_GRID_SIZE hintGridSize, + bool bEnableROI, std::vector roiData, bool bEnableTemporalHints, + bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream); + + virtual void calc(InputArray inputImage, InputArray referenceImage, + InputOutputArray flow, Stream& stream = Stream::Null(), + InputArray hint = cv::noArray(), OutputArray cost = cv::noArray()); + + virtual void collectGarbage(); + + virtual void convertToFloat(InputArray flow, InputOutputArray floatFlow); + + virtual int getGridSize() const { return m_gridSize; } +}; + +NvidiaOpticalFlowImpl_2::NvidiaOpticalFlowImpl_2( + cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, + NV_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NV_OF_HINT_VECTOR_GRID_SIZE hintGridSize, + bool bEnableROI, std::vector roiData, bool bEnableTemporalHints, + bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream) : + m_width(imageSize.width), m_height(imageSize.height), m_preset(perfPreset), + m_gridSize(outputGridSize), m_hintGridSize(hintGridSize), + m_enableROI(bEnableROI), m_roiDataRect(roiData), + m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints), + m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints), + m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId), + m_inputStream(inputStream), m_outputStream(outputStream), + m_cuContext(nullptr), m_scaleFactor(1), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8), + m_hwGridSize((NV_OF_OUTPUT_VECTOR_GRID_SIZE)0) +{ + LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init(); + + int nGpu = 0; + + cuSafeCall(cudaGetDeviceCount(&nGpu)); + if (m_gpuId < 0 || m_gpuId >= nGpu) + { + CV_Error(Error::StsBadArg, "Invalid GPU Ordinal"); + } + + cuSafeCall(cudaSetDevice(m_gpuId)); + cuSafeCall(cudaFree(m_cuContext)); + + cuSafeCall(LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr()(&m_cuContext)); + + if (m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 && + m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 && + m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_4) + { + CV_Error(Error::StsBadArg, "Unsupported output grid size"); + } + + if (m_enableExternalHints) + { + if (m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_1 && + m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_2 && + m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_4 && + m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_8) + { + CV_Error(Error::StsBadArg, "Unsupported hint grid size"); + } + } + + m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST()); + + NVOF_API_CALL(LoadNvidiaModulesObj.GetOFLibraryFunctionPtr()(NV_OF_API_VERSION, m_ofAPI.get())); + NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF)); + + m_roiData = (NV_OF_ROI_RECT*)m_roiDataRect.data(); + + uint32_t size = 0; + if (m_enableROI) + { + NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORT_ROI, nullptr, &size)); + std::unique_ptr val1(new uint32_t[size]); + NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORT_ROI, val1.get(), &size)); + if (val1[0] != NV_OF_TRUE) + { + m_enableROI = false; + m_roiData = nullptr; + CV_Error(Error::StsBadFunc, "ROI not supported on this GPU"); + } + } + size = 0; + NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, nullptr, &size)); + std::unique_ptr val2(new uint32_t[size]); + NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, val2.get(), &size)); + for (uint32_t i = 0; i < size; i++) + { + if (m_gridSize != val2[i]) + { + size = 0; + NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, nullptr, &size)); + std::unique_ptr val3(new uint32_t[size]); + NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, val3.get(), &size)); + + m_hwGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX; + for (uint32_t i = 0; i < size; i++) + { + if (m_gridSize == val3[i]) + { + m_hwGridSize = m_gridSize; + break; + } + if (m_gridSize < val3[i] && val3[i] < m_hwGridSize) + { + m_hwGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)val3[i]; + } + } + if (m_hwGridSize >= (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX) + { + CV_Error(Error::StsBadArg, "Invalid Grid Size"); + } + else + { + m_scaleFactor = m_hwGridSize / m_gridSize; + } + } + else + { + m_hwGridSize = m_gridSize; + } + } + + auto nOutWidth = (m_width + m_hwGridSize - 1) / m_hwGridSize; + auto nOutHeight = (m_height + m_hwGridSize - 1) / m_hwGridSize; + + auto outBufFmt = NV_OF_BUFFER_FORMAT_SHORT2; + + memset(&m_inputBufferDesc, 0, sizeof(m_inputBufferDesc)); + m_inputBufferDesc.width = m_width; + m_inputBufferDesc.height = m_height; + m_inputBufferDesc.bufferFormat = m_format; + m_inputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT; + + memset(&m_outputBufferDesc, 0, sizeof(m_outputBufferDesc)); + m_outputBufferDesc.width = nOutWidth; + m_outputBufferDesc.height = nOutHeight; + m_outputBufferDesc.bufferFormat = outBufFmt; + m_outputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT; + m_outputElementSize = sizeof(NV_OF_FLOW_VECTOR); + + if (m_enableExternalHints) + { + memset(&m_hintBufferDesc, 0, sizeof(m_hintBufferDesc)); + m_hintBufferDesc.width = nOutWidth; + m_hintBufferDesc.height = nOutHeight; + m_hintBufferDesc.bufferFormat = outBufFmt; + m_hintBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_HINT; + m_hintBufElementSize = m_outputElementSize; + } + + if (m_enableCostBuffer) + { + memset(&m_costBufferDesc, 0, sizeof(m_costBufferDesc)); + m_costBufferDesc.width = nOutWidth; + m_costBufferDesc.height = nOutHeight; + m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT8; + m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST; + m_costBufElementSize = sizeof(int); + } + + memset(&m_initParams, 0, sizeof(m_initParams)); + m_initParams.width = m_inputBufferDesc.width; + m_initParams.height = m_inputBufferDesc.height; + m_initParams.enableExternalHints = (NV_OF_BOOL)m_enableExternalHints; + m_initParams.enableOutputCost = (NV_OF_BOOL)m_enableCostBuffer; + m_initParams.hintGridSize = (NV_OF_BOOL)m_enableExternalHints == NV_OF_TRUE ? + m_hintGridSize : (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED; + m_initParams.outGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)m_hwGridSize; + m_initParams.mode = NV_OF_MODE_OPTICALFLOW; + m_initParams.perfLevel = m_preset; + m_initParams.enableRoi = (NV_OF_BOOL)m_enableROI; + + NVOF_API_CALL(GetAPI()->nvOFInit(GetHandle(), &m_initParams)); + + if (m_inputStream || m_outputStream) + { + NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(), + StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream))); + } + + //Input Buffer 1 + NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), + &m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hInputBuffer)); + m_frame0cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hInputBuffer); + NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( + m_hInputBuffer, &m_inputBufferStrideInfo)); + + //Input Buffer 2 + NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), + &m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hReferenceBuffer)); + m_frame1cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hReferenceBuffer); + NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( + m_hReferenceBuffer, &m_referenceBufferStrideInfo)); + + //Output Buffer + NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), + &m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputBuffer)); + m_flowXYcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputBuffer); + NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( + m_hOutputBuffer, &m_outputBufferStrideInfo)); + + if (m_scaleFactor > 1) + { + m_outputBufferDesc.width = (m_width + m_gridSize - 1) / m_gridSize;; + m_outputBufferDesc.height = (m_height + m_gridSize - 1) / m_gridSize;; + + //Output UpScaled Buffer + NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), + &m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputUpScaledBuffer)); + m_flowXYUpScaledcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputUpScaledBuffer); + NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( + m_hOutputUpScaledBuffer, &m_outputUpScaledBufferStrideInfo)); + } + + //Hint Buffer + if (m_enableExternalHints) + { + NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), + &m_hintBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hHintBuffer)); + m_hintcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hHintBuffer); + NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( + m_hHintBuffer, &m_hintBufferStrideInfo)); + } + + //Cost Buffer + if (m_enableCostBuffer) + { + NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), + &m_costBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hCostBuffer)); + m_costcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hCostBuffer); + NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( + m_hCostBuffer, &m_costBufferStrideInfo)); + } +} + +void NvidiaOpticalFlowImpl_2::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, + Stream& stream, InputArray hint, OutputArray cost) +{ + CV_UNUSED(stream); + GpuMat frame0GpuMat(_frame0.size(), _frame0.type(), (void*)m_frame0cuDevPtr, + m_inputBufferStrideInfo.strideInfo[0].strideXInBytes); + GpuMat frame1GpuMat(_frame1.size(), _frame1.type(), (void*)m_frame1cuDevPtr, + m_referenceBufferStrideInfo.strideInfo[0].strideXInBytes); + GpuMat flowXYGpuMat(Size((m_width + m_hwGridSize - 1) / m_hwGridSize, + (m_height + m_hwGridSize - 1) / m_hwGridSize), CV_16SC2, + (void*)m_flowXYcuDevPtr, m_outputBufferStrideInfo.strideInfo[0].strideXInBytes); + GpuMat flowXYGpuMatUpScaled(Size((m_width + m_gridSize - 1) / m_gridSize, + (m_height + m_gridSize - 1) / m_gridSize), CV_16SC2, + (void*)m_flowXYUpScaledcuDevPtr, m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes); + + //check whether frame0 is Mat or GpuMat + if (_frame0.isMat()) + { + //Get Mats from InputArrays + Mat __frame0 = _frame0.getMat(); + frame0GpuMat.upload(__frame0, m_inputStream); + } + else if (_frame0.isGpuMat()) + { + //Get GpuMats from InputArrays + GpuMat __frame0 = _frame0.getGpuMat(); + __frame0.copyTo(frame0GpuMat, m_inputStream); + } + else + { + CV_Error(Error::StsBadArg, + "Incorrect input. Pass input image (frame0) as Mat or GpuMat"); + } + + //check whether frame1 is Mat or GpuMat + if (_frame1.isMat()) + { + //Get Mats from InputArrays + Mat __frame1 = _frame1.getMat(); + frame1GpuMat.upload(__frame1, m_inputStream); + } + else if (_frame1.isGpuMat()) + { + //Get GpuMats from InputArrays + GpuMat __frame1 = _frame1.getGpuMat(); + __frame1.copyTo(frame1GpuMat, m_inputStream); + } + else + { + CV_Error(Error::StsBadArg, + "Incorrect input. Pass reference image (frame1) as Mat or GpuMat"); + } + + if (m_enableExternalHints) + { + GpuMat hintGpuMat(hint.size(), hint.type(), (void*)m_hintcuDevPtr, + m_hintBufferStrideInfo.strideInfo[0].strideXInBytes); + + if (hint.isMat()) + { + //Get Mat from InputArray hint + Mat _hint = hint.getMat(); + hintGpuMat.upload(_hint, m_inputStream); + } + else if (hint.isGpuMat()) + { + //Get GpuMat from InputArray hint + GpuMat _hint = hint.getGpuMat(); + _hint.copyTo(hintGpuMat, m_inputStream); + } + else + { + CV_Error(Error::StsBadArg, "Incorrect hint buffer passed. Pass Mat or GpuMat"); + } + } + + //Execute Call + NV_OF_EXECUTE_INPUT_PARAMS exeInParams; + NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams; + memset(&exeInParams, 0, sizeof(exeInParams)); + exeInParams.inputFrame = m_hInputBuffer; + exeInParams.referenceFrame = m_hReferenceBuffer; + exeInParams.disableTemporalHints = (NV_OF_BOOL)m_enableTemporalHints == NV_OF_TRUE ? + NV_OF_FALSE : NV_OF_TRUE; + exeInParams.externalHints = m_initParams.enableExternalHints == NV_OF_TRUE ? + m_hHintBuffer : nullptr; + exeInParams.numRois = m_initParams.enableRoi == NV_OF_TRUE ? m_roiDataRect.size() : 0; + exeInParams.roiData = m_initParams.enableRoi == NV_OF_TRUE ? m_roiData : nullptr; + memset(&exeOutParams, 0, sizeof(exeOutParams)); + exeOutParams.outputBuffer = m_hOutputBuffer; + exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE ? + m_hCostBuffer : nullptr; + NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams)); + + if (m_scaleFactor > 1) + { + uint32_t nSrcWidth = flowXYGpuMat.size().width; + uint32_t nSrcHeight = flowXYGpuMat.size().height; + uint32_t nSrcPitch = m_outputBufferStrideInfo.strideInfo[0].strideXInBytes; + uint32_t nDstWidth = flowXYGpuMatUpScaled.size().width; + uint32_t nDstHeight = flowXYGpuMatUpScaled.size().height; + uint32_t nDstPitch = m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes; + cv::cuda::device::optflow_nvidia::FlowUpsample((void*)m_flowXYcuDevPtr, nSrcWidth, nSrcPitch, + nSrcHeight, (void*)m_flowXYUpScaledcuDevPtr, nDstWidth, nDstPitch, nDstHeight, m_scaleFactor); + + if (_flow.isMat()) + flowXYGpuMatUpScaled.download(_flow, m_outputStream); + else if (_flow.isGpuMat()) + flowXYGpuMatUpScaled.copyTo(_flow, m_outputStream); + else + CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat"); + } + else + { + if (_flow.isMat()) + flowXYGpuMat.download(_flow, m_outputStream); + else if (_flow.isGpuMat()) + flowXYGpuMat.copyTo(_flow, m_outputStream); + else + CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat"); + } + + if (m_enableCostBuffer) + { + GpuMat costGpuMat(Size((m_width + m_hwGridSize - 1) / m_hwGridSize, + (m_height + m_hwGridSize - 1) / m_hwGridSize), CV_8SC1, (void*)m_costcuDevPtr, + m_costBufferStrideInfo.strideInfo[0].strideXInBytes); + + if (cost.isMat()) + costGpuMat.download(cost, m_outputStream); + else if (cost.isGpuMat()) + costGpuMat.copyTo(cost, m_outputStream); + else + CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat"); + } + m_outputStream.waitForCompletion(); +} + +void NvidiaOpticalFlowImpl_2::collectGarbage() +{ + if (m_enableROI) + { + m_roiData = nullptr; + } + if (m_hInputBuffer) + { + NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hInputBuffer)); + } + if (m_hReferenceBuffer) + { + NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hReferenceBuffer)); + } + if (m_hOutputBuffer) + { + NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputBuffer)); + } + if (m_scaleFactor > 1 && m_hOutputUpScaledBuffer) + { + NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputUpScaledBuffer)); + } + if (m_enableExternalHints) + { + if (m_hHintBuffer) + { + NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hHintBuffer)); + } + } + if (m_enableCostBuffer) + { + if (m_hCostBuffer) + { + NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer)); + } + } + if (m_inputStream) + { + m_inputStream.waitForCompletion(); + } + if (m_outputStream) + { + m_outputStream.waitForCompletion(); + } + if (m_hOF) + { + NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF)); + } +} + +void NvidiaOpticalFlowImpl_2::convertToFloat(InputArray _flow, InputOutputArray floatFlow) +{ + Mat flow; + if (_flow.isMat()) + { + Mat __flow = _flow.getMat(); + __flow.copyTo(flow); + } + else if (_flow.isGpuMat()) + { + GpuMat __flow = _flow.getGpuMat(); + __flow.download(flow); + } + else + { + CV_Error(Error::StsBadArg, + "Incorrect flow buffer passed. Pass either Mat or GpuMat"); + } + + int width = flow.size().width; + int height = flow.size().height; + + Mat output(Size(width, height), CV_32FC2); + for (int y = 0; y < height; ++y) + { + for (int x = 0; x < (int)(width * sizeof(int16_t)); ++x) + { + output.at(y, x) = (float)(flow.at(y, x) / (float)(1 << 5)); + } + } + + if (floatFlow.isMat()) + { + output.copyTo(floatFlow); + } + else if (floatFlow.isGpuMat()) + { + GpuMat _output(output); + _output.copyTo(floatFlow); + } + else + { + CV_Error(Error::StsBadArg, + "Incorrect flow buffer passed for upsampled flow. Pass either Mat or GpuMat"); + } }} Ptr cv::cuda::NvidiaOpticalFlow_1_0::create( - int width, int height, NVIDIA_OF_PERF_LEVEL perfPreset, + cv::Size imageSize, NVIDIA_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream& inputStream, Stream& outputStream) { return makePtr( - width, - height, + imageSize, + (NV_OF_PERF_LEVEL)perfPreset, + bEnableTemporalHints, + bEnableExternalHints, + bEnableCostBuffer, + gpuId, + inputStream, + outputStream); +} + +Ptr cv::cuda::NvidiaOpticalFlow_2_0::create( + cv::Size imageSize, NVIDIA_OF_PERF_LEVEL perfPreset, + NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize, + bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, + int gpuId, Stream& inputStream, Stream& outputStream) +{ + std::vector roi(0); + return makePtr( + imageSize, + (NV_OF_PERF_LEVEL)perfPreset, + (NV_OF_OUTPUT_VECTOR_GRID_SIZE)outputGridSize, + (NV_OF_HINT_VECTOR_GRID_SIZE)hintGridSize, + false, + roi, + bEnableTemporalHints, + bEnableExternalHints, + bEnableCostBuffer, + gpuId, + inputStream, + outputStream); +} + +Ptr cv::cuda::NvidiaOpticalFlow_2_0::create( + cv::Size imageSize, std::vector roiData, NVIDIA_OF_PERF_LEVEL perfPreset, + NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize, + bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, + int gpuId, Stream& inputStream, Stream& outputStream) +{ + return makePtr( + imageSize, (NV_OF_PERF_LEVEL)perfPreset, + (NV_OF_OUTPUT_VECTOR_GRID_SIZE)outputGridSize, + (NV_OF_HINT_VECTOR_GRID_SIZE)hintGridSize, + true, + roiData, bEnableTemporalHints, bEnableExternalHints, bEnableCostBuffer, diff --git a/modules/cudaoptflow/test/test_optflow.cpp b/modules/cudaoptflow/test/test_optflow.cpp index 433c9ee9c..bbbec700e 100644 --- a/modules/cudaoptflow/test/test_optflow.cpp +++ b/modules/cudaoptflow/test/test_optflow.cpp @@ -495,19 +495,11 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression) cv::Mat frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame1.empty()); - const int width = frame0.size().width; - const int height = frame0.size().height; - const bool enableTemporalHints = false; - const bool enableExternalHints = false; - const bool enableCostBuffer = false; - const int gpuid = 0; - cv::Ptr d_nvof; try { - d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height, - cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW, - enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid); + d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(), + cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW); } catch (const cv::Exception& e) { @@ -519,7 +511,7 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression) Mat flow, upsampledFlow; d_nvof->calc(loadMat(frame0), loadMat(frame1), flow); - d_nvof->upSampler(flow, width, height, gridSize, upsampledFlow); + d_nvof->upSampler(flow, frame0.size(), gridSize, upsampledFlow); std::string fname(cvtest::TS::ptr()->get_data_path()); fname += "opticalflow/nvofGolden.flo"; @@ -527,6 +519,7 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression) ASSERT_FALSE(golden.empty()); EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10); + d_nvof->collectGarbage(); } CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan) @@ -539,19 +532,11 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan) cv::Mat r_frame0, r_frame1; - const int width = frame0.size().width; - const int height = frame0.size().height; - const bool enableTemporalHints = false; - const bool enableExternalHints = false; - const bool enableCostBuffer = false; - const int gpuid = 0; - cv::Ptr d_nvof; try { - d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height, - cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW, - enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid); + d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(), + cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW); } catch (const cv::Exception& e) { @@ -569,9 +554,96 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan) EXPECT_TRUE(cv::checkRange(flowx)); EXPECT_TRUE(cv::checkRange(flowy)); + d_nvof->collectGarbage(); }; INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_1_0, ALL_DEVICES); +////////////////////////////////////////////////////// +// NvidiaOpticalFlow_2_0 + +struct NvidiaOpticalFlow_2_0 : testing::TestWithParam +{ + cv::cuda::DeviceInfo devInfo; + + virtual void SetUp() + { + devInfo = GetParam(); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(NvidiaOpticalFlow_2_0, Regression) +{ + cv::Mat frame0 = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::Ptr d_nvof; + try + { + d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(), + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW); + } + catch (const cv::Exception& e) + { + if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr) + throw SkipTestException("Current configuration is not supported"); + throw; + } + + Mat flow, upsampledFlow; + d_nvof->calc(loadMat(frame0), loadMat(frame1), flow); + d_nvof->convertToFloat(flow, upsampledFlow); + + std::string fname(cvtest::TS::ptr()->get_data_path()); + fname += "opticalflow/nvofGolden_2.flo"; + cv::Mat golden = cv::readOpticalFlow(fname.c_str()); + ASSERT_FALSE(golden.empty()); + + EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10); + d_nvof->collectGarbage(); +} + +CUDA_TEST_P(NvidiaOpticalFlow_2_0, OpticalFlowNan) +{ + cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::Mat r_frame0, r_frame1; + + cv::Ptr d_nvof; + try + { + d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(), + cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW); + } + catch (const cv::Exception& e) + { + if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr) + throw SkipTestException("Current configuration is not supported"); + throw; + } + + Mat flow, flowx, flowy; + d_nvof->calc(loadMat(frame0), loadMat(frame1), flow); + + Mat planes[] = { flowx, flowy }; + split(flow, planes); + flowx = planes[0]; flowy = planes[1]; + + EXPECT_TRUE(cv::checkRange(flowx)); + EXPECT_TRUE(cv::checkRange(flowy)); + d_nvof->collectGarbage(); +}; + +INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_2_0, ALL_DEVICES); + }} // namespace #endif // HAVE_CUDA