You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1318 lines
50 KiB
1318 lines
50 KiB
// |
|
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html. |
|
// |
|
//M*/ |
|
#include "precomp.hpp" |
|
|
|
#if !defined HAVE_CUDA || defined(CUDA_DISABLER) |
|
|
|
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create |
|
(cv::Size, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) { |
|
throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0>(); } |
|
|
|
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create( |
|
cv::Size, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE, |
|
bool, bool, bool, int, Stream&, Stream&) { |
|
throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0>(); |
|
} |
|
|
|
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create( |
|
cv::Size, std::vector<Rect>, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE, |
|
bool, bool, bool, int, Stream&, Stream&) { |
|
throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0>(); |
|
} |
|
|
|
#elif !defined HAVE_NVIDIA_OPTFLOW |
|
|
|
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create( |
|
cv::Size, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) |
|
{ |
|
CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support"); |
|
} |
|
|
|
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create( |
|
cv::Size, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE, |
|
bool, bool, bool, int, Stream&, Stream&) |
|
{ |
|
CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support"); |
|
} |
|
|
|
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create( |
|
cv::Size, std::vector<Rect>, NVIDIA_OF_PERF_LEVEL, NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE, NVIDIA_OF_HINT_VECTOR_GRID_SIZE, |
|
bool, bool, bool, int, Stream&, Stream&) |
|
{ |
|
CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support"); |
|
} |
|
|
|
#else |
|
|
|
#include "nvOpticalFlowCommon.h" |
|
#include "nvOpticalFlowCuda.h" |
|
|
|
namespace cv { namespace cuda { namespace device { namespace optflow_nvidia |
|
{ |
|
void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint32_t nSrcHeight, |
|
void* dstDevPtr, uint32_t nDstWidth, uint32_t nDstPitch, uint32_t nDstHeight, |
|
uint32_t nScaleFactor); |
|
}}}} |
|
|
|
#if defined(_WIN32) || defined(_WIN64) |
|
#include <Windows.h> |
|
#else |
|
#define HMODULE void * |
|
#define _stricmp strcasecmp |
|
#include <dlfcn.h> |
|
#endif |
|
|
|
//macro for dll loading |
|
#if defined(_WIN64) |
|
#define OF_MODULENAME TEXT("nvofapi64.dll") |
|
#define CUDA_MODULENAME TEXT("nvcuda.dll") |
|
#elif defined(_WIN32) |
|
#define OF_MODULENAME TEXT("nvofapi.dll") |
|
#define CUDA_MODULENAME TEXT("nvcuda.dll") |
|
#else |
|
#define OF_MODULENAME "libnvidia-opticalflow.so.1" |
|
#define CUDA_MODULENAME "libcuda.so" |
|
#endif |
|
|
|
#define NVOF_API_CALL(nvOFAPI) \ |
|
do \ |
|
{ \ |
|
NV_OF_STATUS errorCode = nvOFAPI; \ |
|
std::ostringstream errorLog; \ |
|
if(errorCode != NV_OF_SUCCESS) \ |
|
{ \ |
|
switch (errorCode) \ |
|
{ \ |
|
case 1: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_OF_NOT_AVAILABLE"; \ |
|
CV_Error(Error::StsBadFunc, errorLog.str()); \ |
|
break; \ |
|
case 2: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_UNSUPPORTED_DEVICE"; \ |
|
CV_Error(Error::StsBadArg, errorLog.str()); \ |
|
break; \ |
|
case 3: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_DEVICE_DOES_NOT_EXIST"; \ |
|
CV_Error(Error::StsBadArg, errorLog.str()); \ |
|
break; \ |
|
case 4: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_INVALID_PTR"; \ |
|
CV_Error(Error::StsNullPtr, errorLog.str()); \ |
|
break; \ |
|
case 5: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_INVALID_PARAM"; \ |
|
CV_Error(Error::StsBadArg, errorLog.str()); \ |
|
break; \ |
|
case 6: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_INVALID_CALL"; \ |
|
CV_Error(Error::BadCallBack, errorLog.str()); \ |
|
break; \ |
|
case 7: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_INVALID_VERSION"; \ |
|
CV_Error(Error::StsError, errorLog.str()); \ |
|
break; \ |
|
case 8: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_OUT_OF_MEMORY"; \ |
|
CV_Error(Error::StsNoMem, errorLog.str()); \ |
|
break; \ |
|
case 9: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_NOT_INITIALIZED"; \ |
|
CV_Error(Error::StsBadArg, errorLog.str()); \ |
|
break; \ |
|
case 10: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_UNSUPPORTED_FEATURE"; \ |
|
CV_Error(Error::StsBadArg, errorLog.str()); \ |
|
break; \ |
|
case 11: \ |
|
errorLog << #nvOFAPI << " returned error " << (unsigned int)errorCode; \ |
|
errorLog << ":NV_OF_ERR_GENERIC"; \ |
|
CV_Error(Error::StsInternal, errorLog.str()); \ |
|
break; \ |
|
default: \ |
|
break; \ |
|
} \ |
|
} \ |
|
} while (0) \ |
|
|
|
using namespace std; |
|
using namespace cv; |
|
using namespace cv::cuda; |
|
|
|
namespace |
|
{ |
|
class LoadNvidiaModules |
|
{ |
|
private: |
|
typedef int(*PFNCudaCuCtxGetCurrent)(CUcontext*); |
|
typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda) |
|
(int apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf); |
|
|
|
PFNCudaCuCtxGetCurrent m_cudaDriverAPIGetCurrentCtx; |
|
PFNNvOFAPICreateInstanceCuda m_NvOFAPICreateInstanceCuda; |
|
HMODULE m_hOFModule; |
|
HMODULE m_hCudaModule; |
|
bool m_isFailed; |
|
|
|
LoadNvidiaModules() : |
|
m_cudaDriverAPIGetCurrentCtx(NULL), |
|
m_NvOFAPICreateInstanceCuda(NULL), |
|
m_isFailed(false) |
|
{ |
|
//Loading Cuda Library |
|
#if defined(_WIN32) || defined(_WIN64) |
|
HMODULE hCudaModule = LoadLibrary(CUDA_MODULENAME); |
|
#else |
|
void *hCudaModule = dlopen(CUDA_MODULENAME, RTLD_LAZY); |
|
#endif |
|
|
|
if (hCudaModule == NULL) |
|
{ |
|
m_isFailed = true; |
|
CV_Error(Error::StsBadFunc, "Cannot find Cuda library."); |
|
} |
|
m_hCudaModule = hCudaModule; |
|
|
|
#if defined(_WIN32) |
|
m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)GetProcAddress(m_hCudaModule, "cuCtxGetCurrent"); |
|
#else |
|
m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)dlsym(m_hCudaModule, "cuCtxGetCurrent"); |
|
#endif |
|
if (!m_cudaDriverAPIGetCurrentCtx) |
|
{ |
|
m_isFailed = true; |
|
CV_Error(Error::StsBadFunc, |
|
"Cannot find Cuda Driver API : cuCtxGetCurrent() entry in Cuda library"); |
|
} |
|
|
|
//Loading Optical Flow Library |
|
#if defined(_WIN32) || defined(_WIN64) |
|
HMODULE hOFModule = LoadLibrary(OF_MODULENAME); |
|
#else |
|
void *hOFModule = dlopen(OF_MODULENAME, RTLD_LAZY); |
|
#endif |
|
|
|
if (hOFModule == NULL) |
|
{ |
|
m_isFailed = true; |
|
CV_Error(Error::StsBadFunc, "Cannot find NvOF library."); |
|
} |
|
m_hOFModule = hOFModule; |
|
|
|
#if defined(_WIN32) |
|
m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hOFModule, "NvOFAPICreateInstanceCuda"); |
|
#else |
|
m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)dlsym(m_hOFModule, "NvOFAPICreateInstanceCuda"); |
|
#endif |
|
if (!m_NvOFAPICreateInstanceCuda) |
|
{ |
|
m_isFailed = true; |
|
CV_Error(Error::StsBadFunc, |
|
"Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library"); |
|
} |
|
}; |
|
|
|
~LoadNvidiaModules() |
|
{ |
|
if (NULL != m_hCudaModule) |
|
{ |
|
#if defined(_WIN32) || defined(_WIN64) |
|
FreeLibrary(m_hCudaModule); |
|
#else |
|
dlclose(m_hCudaModule); |
|
#endif |
|
} |
|
if (NULL != m_hOFModule) |
|
{ |
|
#if defined(_WIN32) || defined(_WIN64) |
|
FreeLibrary(m_hOFModule); |
|
#else |
|
dlclose(m_hOFModule); |
|
#endif |
|
} |
|
m_hCudaModule = NULL; |
|
m_hOFModule = NULL; |
|
m_cudaDriverAPIGetCurrentCtx = NULL; |
|
m_NvOFAPICreateInstanceCuda = NULL; |
|
} |
|
|
|
public: |
|
static LoadNvidiaModules& Init() |
|
{ |
|
static LoadNvidiaModules LoadLibraryObj; |
|
if (LoadLibraryObj.m_isFailed) |
|
CV_Error(Error::StsError, "Can't initialize LoadNvidiaModules Class Object"); |
|
return LoadLibraryObj; |
|
} |
|
|
|
PFNCudaCuCtxGetCurrent GetCudaLibraryFunctionPtr() { return m_cudaDriverAPIGetCurrentCtx; } |
|
PFNNvOFAPICreateInstanceCuda GetOFLibraryFunctionPtr() { return m_NvOFAPICreateInstanceCuda; } |
|
}; |
|
|
|
class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0 |
|
{ |
|
private: |
|
int m_width; |
|
int m_height; |
|
NV_OF_PERF_LEVEL m_preset; |
|
bool m_enableTemporalHints; |
|
bool m_enableExternalHints; |
|
bool m_enableCostBuffer; |
|
int m_gpuId; |
|
Stream m_inputStream; |
|
Stream m_outputStream; |
|
|
|
CUcontext m_cuContext; |
|
NV_OF_BUFFER_FORMAT m_format; |
|
|
|
NV_OF_OUTPUT_VECTOR_GRID_SIZE m_gridSize; |
|
|
|
NV_OF_BUFFER_DESCRIPTOR m_inputBufferDesc; |
|
NV_OF_BUFFER_DESCRIPTOR m_outputBufferDesc; |
|
NV_OF_BUFFER_DESCRIPTOR m_hintBufferDesc; |
|
NV_OF_BUFFER_DESCRIPTOR m_costBufferDesc; |
|
|
|
int m_outputElementSize; |
|
int m_costBufElementSize; |
|
int m_hintBufElementSize; |
|
|
|
NV_OF_INIT_PARAMS m_initParams; |
|
|
|
std::unique_ptr<NV_OF_CUDA_API_FUNCTION_LIST> m_ofAPI; |
|
NvOFHandle m_hOF; //nvof handle |
|
|
|
NvOFGPUBufferHandle m_hInputBuffer; |
|
NvOFGPUBufferHandle m_hReferenceBuffer; |
|
NvOFGPUBufferHandle m_hOutputBuffer; |
|
NvOFGPUBufferHandle m_hHintBuffer; |
|
NvOFGPUBufferHandle m_hCostBuffer; |
|
|
|
CUdeviceptr m_frame0cuDevPtr; |
|
CUdeviceptr m_frame1cuDevPtr; |
|
CUdeviceptr m_flowXYcuDevPtr; |
|
CUdeviceptr m_hintcuDevPtr; |
|
CUdeviceptr m_costcuDevPtr; |
|
|
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_inputBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_referenceBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_hintBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_costBufferStrideInfo; |
|
|
|
NV_OF_CUDA_API_FUNCTION_LIST* GetAPI() |
|
{ |
|
return m_ofAPI.get(); |
|
} |
|
|
|
NvOFHandle GetHandle() { return m_hOF; } |
|
|
|
public: |
|
NvidiaOpticalFlowImpl(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints, |
|
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream); |
|
|
|
virtual void calc(InputArray inputImage, InputArray referenceImage, |
|
InputOutputArray flow, Stream& stream = Stream::Null(), |
|
InputArray hint = cv::noArray(), OutputArray cost = cv::noArray()); |
|
|
|
|
|
virtual void collectGarbage(); |
|
|
|
virtual void upSampler(InputArray flow, cv::Size imageSize, |
|
int gridSize, InputOutputArray upsampledFlow); |
|
|
|
virtual int getGridSize() const { return m_gridSize; } |
|
|
|
~NvidiaOpticalFlowImpl(); |
|
}; |
|
|
|
NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl( |
|
cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints, |
|
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, |
|
Stream inputStream, Stream outputStream) : |
|
m_width(imageSize.width), m_height(imageSize.height), m_preset(perfPreset), |
|
m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints), |
|
m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints), |
|
m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId), |
|
m_inputStream(inputStream), m_outputStream(outputStream), |
|
m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8), |
|
m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4) |
|
{ |
|
LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init(); |
|
|
|
int nGpu = 0; |
|
|
|
cuSafeCall(cudaGetDeviceCount(&nGpu)); |
|
if (m_gpuId < 0 || m_gpuId >= nGpu) |
|
{ |
|
CV_Error(Error::StsBadArg, "Invalid GPU Ordinal"); |
|
} |
|
|
|
cuSafeCall(cudaSetDevice(m_gpuId)); |
|
cuSafeCall(cudaFree(m_cuContext)); |
|
|
|
cuSafeCall(LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr()(&m_cuContext)); |
|
|
|
if (m_gridSize != NV_OF_OUTPUT_VECTOR_GRID_SIZE_4) |
|
{ |
|
CV_Error(Error::StsBadArg, "Unsupported grid size"); |
|
} |
|
|
|
auto nOutWidth = (m_width + m_gridSize - 1) / m_gridSize; |
|
auto nOutHeight = (m_height + m_gridSize - 1) / m_gridSize; |
|
|
|
auto outBufFmt = NV_OF_BUFFER_FORMAT_SHORT2; |
|
|
|
memset(&m_inputBufferDesc, 0, sizeof(m_inputBufferDesc)); |
|
m_inputBufferDesc.width = m_width; |
|
m_inputBufferDesc.height = m_height; |
|
m_inputBufferDesc.bufferFormat = m_format; |
|
m_inputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT; |
|
|
|
memset(&m_outputBufferDesc, 0, sizeof(m_outputBufferDesc)); |
|
m_outputBufferDesc.width = nOutWidth; |
|
m_outputBufferDesc.height = nOutHeight; |
|
m_outputBufferDesc.bufferFormat = outBufFmt; |
|
m_outputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT; |
|
m_outputElementSize = sizeof(NV_OF_FLOW_VECTOR); |
|
|
|
if (m_enableExternalHints) |
|
{ |
|
memset(&m_hintBufferDesc, 0, sizeof(m_hintBufferDesc)); |
|
m_hintBufferDesc.width = nOutWidth; |
|
m_hintBufferDesc.height = nOutHeight; |
|
m_hintBufferDesc.bufferFormat = outBufFmt; |
|
m_hintBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_HINT; |
|
m_hintBufElementSize = m_outputElementSize; |
|
} |
|
|
|
if (m_enableCostBuffer) |
|
{ |
|
memset(&m_costBufferDesc, 0, sizeof(m_costBufferDesc)); |
|
m_costBufferDesc.width = nOutWidth; |
|
m_costBufferDesc.height = nOutHeight; |
|
m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT; |
|
m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST; |
|
m_costBufElementSize = sizeof(int); |
|
} |
|
|
|
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST()); |
|
|
|
NVOF_API_CALL(LoadNvidiaModulesObj.GetOFLibraryFunctionPtr()(NV_OF_API_VERSION, m_ofAPI.get())); |
|
NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF)); |
|
|
|
memset(&m_initParams, 0, sizeof(m_initParams)); |
|
m_initParams.width = m_inputBufferDesc.width; |
|
m_initParams.height = m_inputBufferDesc.height; |
|
m_initParams.enableExternalHints = (NV_OF_BOOL)m_enableExternalHints; |
|
m_initParams.enableOutputCost = (NV_OF_BOOL)m_enableCostBuffer; |
|
m_initParams.hintGridSize = (NV_OF_BOOL)m_enableExternalHints == NV_OF_TRUE ? |
|
NV_OF_HINT_VECTOR_GRID_SIZE_4 : NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED; |
|
m_initParams.outGridSize = m_gridSize; |
|
m_initParams.mode = NV_OF_MODE_OPTICALFLOW; |
|
m_initParams.perfLevel = m_preset; |
|
|
|
NVOF_API_CALL(GetAPI()->nvOFInit(GetHandle(), &m_initParams)); |
|
|
|
if (m_inputStream || m_outputStream) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(), |
|
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream))); |
|
} |
|
|
|
//Input Buffer 1 |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hInputBuffer)); |
|
m_frame0cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hInputBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hInputBuffer, &m_inputBufferStrideInfo)); |
|
|
|
//Input Buffer 2 |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hReferenceBuffer)); |
|
m_frame1cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hReferenceBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hReferenceBuffer, &m_referenceBufferStrideInfo)); |
|
|
|
//Output Buffer |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputBuffer)); |
|
m_flowXYcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hOutputBuffer, &m_outputBufferStrideInfo)); |
|
|
|
//Hint Buffer |
|
if (m_enableExternalHints) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_hintBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hHintBuffer)); |
|
m_hintcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hHintBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hHintBuffer, &m_hintBufferStrideInfo)); |
|
} |
|
|
|
//Cost Buffer |
|
if (m_enableCostBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_costBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hCostBuffer)); |
|
m_costcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hCostBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hCostBuffer, &m_costBufferStrideInfo)); |
|
} |
|
} |
|
|
|
void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, |
|
Stream& stream, InputArray hint, OutputArray cost) |
|
{ |
|
if (stream && !m_inputStream) |
|
{ |
|
m_inputStream = stream; |
|
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(), |
|
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream))); |
|
} |
|
|
|
GpuMat frame0GpuMat(_frame0.size(), _frame0.type(), (void*)m_frame0cuDevPtr, |
|
m_inputBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
GpuMat frame1GpuMat(_frame1.size(), _frame1.type(), (void*)m_frame1cuDevPtr, |
|
m_referenceBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
GpuMat flowXYGpuMat(Size((m_width + m_gridSize - 1) / m_gridSize, |
|
(m_height + m_gridSize - 1) / m_gridSize), CV_16SC2, |
|
(void*)m_flowXYcuDevPtr, m_outputBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
|
|
//check whether frame0 is Mat or GpuMat |
|
if (_frame0.isMat()) |
|
{ |
|
//Get Mats from InputArrays |
|
Mat __frame0 = _frame0.getMat(); |
|
frame0GpuMat.upload(__frame0, m_inputStream); |
|
} |
|
else if (_frame0.isGpuMat()) |
|
{ |
|
//Get GpuMats from InputArrays |
|
GpuMat __frame0 = _frame0.getGpuMat(); |
|
__frame0.copyTo(frame0GpuMat, m_inputStream); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect input. Pass input image (frame0) as Mat or GpuMat"); |
|
} |
|
|
|
//check whether frame1 is Mat or GpuMat |
|
if (_frame1.isMat()) |
|
{ |
|
//Get Mats from InputArrays |
|
Mat __frame1 = _frame1.getMat(); |
|
frame1GpuMat.upload(__frame1, m_inputStream); |
|
} |
|
else if (_frame1.isGpuMat()) |
|
{ |
|
//Get GpuMats from InputArrays |
|
GpuMat __frame1 = _frame1.getGpuMat(); |
|
__frame1.copyTo(frame1GpuMat, m_inputStream); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect input. Pass reference image (frame1) as Mat or GpuMat"); |
|
} |
|
|
|
if (m_enableExternalHints) |
|
{ |
|
GpuMat hintGpuMat(hint.size(), hint.type(), (void*)m_hintcuDevPtr, |
|
m_hintBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
|
|
if (hint.isMat()) |
|
{ |
|
//Get Mat from InputArray hint |
|
Mat _hint = hint.getMat(); |
|
hintGpuMat.upload(_hint, m_inputStream); |
|
} |
|
else if(hint.isGpuMat()) |
|
{ |
|
//Get GpuMat from InputArray hint |
|
GpuMat _hint = hint.getGpuMat(); |
|
_hint.copyTo(hintGpuMat, m_inputStream); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg,"Incorrect hint buffer passed. Pass Mat or GpuMat"); |
|
} |
|
} |
|
|
|
//Execute Call |
|
NV_OF_EXECUTE_INPUT_PARAMS exeInParams; |
|
NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams; |
|
memset(&exeInParams, 0, sizeof(exeInParams)); |
|
exeInParams.inputFrame = m_hInputBuffer; |
|
exeInParams.referenceFrame = m_hReferenceBuffer; |
|
exeInParams.disableTemporalHints = (NV_OF_BOOL)m_enableTemporalHints == NV_OF_TRUE ? |
|
NV_OF_FALSE : NV_OF_TRUE; |
|
exeInParams.externalHints = m_initParams.enableExternalHints == NV_OF_TRUE ? |
|
m_hHintBuffer : nullptr; |
|
memset(&exeOutParams, 0, sizeof(exeOutParams)); |
|
exeOutParams.outputBuffer = m_hOutputBuffer; |
|
exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE ? |
|
m_hCostBuffer : nullptr; |
|
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams)); |
|
|
|
if (_flow.isMat()) |
|
flowXYGpuMat.download(_flow, m_outputStream); |
|
else if(_flow.isGpuMat()) |
|
flowXYGpuMat.copyTo(_flow, m_outputStream); |
|
else |
|
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat"); |
|
|
|
if (m_enableCostBuffer) |
|
{ |
|
GpuMat costGpuMat(Size((m_width + m_gridSize - 1) / m_gridSize, |
|
(m_height + m_gridSize - 1) / m_gridSize), CV_32SC1, (void*)m_costcuDevPtr, |
|
m_costBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
|
|
if (cost.isMat()) |
|
costGpuMat.download(cost, m_outputStream); |
|
else if(cost.isGpuMat()) |
|
costGpuMat.copyTo(cost, m_outputStream); |
|
else |
|
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat"); |
|
} |
|
m_outputStream.waitForCompletion(); |
|
} |
|
|
|
void NvidiaOpticalFlowImpl::collectGarbage() |
|
{ |
|
if (m_hInputBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hInputBuffer)); |
|
m_hInputBuffer = nullptr; |
|
} |
|
if (m_hReferenceBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hReferenceBuffer)); |
|
m_hReferenceBuffer = nullptr; |
|
} |
|
if (m_hOutputBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputBuffer)); |
|
m_hOutputBuffer = nullptr; |
|
} |
|
if (m_enableExternalHints) |
|
{ |
|
if (m_hHintBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hHintBuffer)); |
|
m_hHintBuffer = nullptr; |
|
} |
|
} |
|
if (m_enableCostBuffer) |
|
{ |
|
if (m_hCostBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer)); |
|
m_hCostBuffer = nullptr; |
|
} |
|
} |
|
if (m_inputStream) |
|
{ |
|
m_inputStream.waitForCompletion(); |
|
} |
|
if (m_outputStream) |
|
{ |
|
m_outputStream.waitForCompletion(); |
|
} |
|
if (m_hOF) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF)); |
|
m_hOF = nullptr; |
|
} |
|
} |
|
|
|
NvidiaOpticalFlowImpl::~NvidiaOpticalFlowImpl() |
|
{ |
|
collectGarbage(); |
|
} |
|
|
|
void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, cv::Size imageSize, |
|
int gridSize, InputOutputArray upsampledFlow) |
|
{ |
|
Mat flow; |
|
if (_flow.isMat()) |
|
{ |
|
Mat __flow = _flow.getMat(); |
|
__flow.copyTo(flow); |
|
} |
|
else if (_flow.isGpuMat()) |
|
{ |
|
GpuMat __flow = _flow.getGpuMat(); |
|
__flow.download(flow); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect flow buffer passed. Pass either Mat or GpuMat"); |
|
} |
|
|
|
std::unique_ptr<float[]> flowVectors = nullptr; |
|
const NV_OF_FLOW_VECTOR* _flowVectors = static_cast<const NV_OF_FLOW_VECTOR*>((const void*)flow.data); |
|
flowVectors.reset(new float[2 * imageSize.width * imageSize.height]); |
|
for (int y = 0; y < imageSize.height; ++y) |
|
{ |
|
for (int x = 0; x < imageSize.width; ++x) |
|
{ |
|
int blockIdX = x / gridSize; |
|
int blockIdY = y / gridSize; |
|
int widthInBlocks = ((imageSize.width + gridSize - 1) / gridSize); |
|
int heightInBlocks = ((imageSize.height + gridSize - 1) / gridSize);; |
|
if ((blockIdX < widthInBlocks) && (blockIdY < heightInBlocks)) |
|
{ |
|
flowVectors[(y * 2 * imageSize.width) + 2 * x] = (float) |
|
(_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowx / (float)(1 << 5)); |
|
flowVectors[(y * 2 * imageSize.width) + 2 * x + 1] = (float) |
|
(_flowVectors[blockIdX + (blockIdY * widthInBlocks)].flowy / (float)(1 << 5)); |
|
} |
|
} |
|
} |
|
|
|
Mat output(Size(imageSize.width, imageSize.height), CV_32FC2, flowVectors.get()); |
|
if (upsampledFlow.isMat()) |
|
{ |
|
output.copyTo(upsampledFlow); |
|
} |
|
else if (upsampledFlow.isGpuMat()) |
|
{ |
|
GpuMat _output(output); |
|
_output.copyTo(upsampledFlow); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect flow buffer passed for upsampled flow. Pass either Mat or GpuMat"); |
|
} |
|
} |
|
|
|
class NvidiaOpticalFlowImpl_2 : public cv::cuda::NvidiaOpticalFlow_2_0 |
|
{ |
|
private: |
|
int m_width; |
|
int m_height; |
|
NV_OF_PERF_LEVEL m_preset; |
|
NV_OF_OUTPUT_VECTOR_GRID_SIZE m_gridSize; |
|
NV_OF_HINT_VECTOR_GRID_SIZE m_hintGridSize; |
|
bool m_enableROI; |
|
std::vector<Rect> m_roiDataRect; |
|
NV_OF_ROI_RECT* m_roiData; |
|
bool m_enableTemporalHints; |
|
bool m_enableExternalHints; |
|
bool m_enableCostBuffer; |
|
int m_gpuId; |
|
Stream m_inputStream; |
|
Stream m_outputStream; |
|
|
|
CUcontext m_cuContext; |
|
int m_scaleFactor; |
|
NV_OF_BUFFER_FORMAT m_format; |
|
NV_OF_OUTPUT_VECTOR_GRID_SIZE m_hwGridSize; |
|
|
|
NV_OF_BUFFER_DESCRIPTOR m_inputBufferDesc; |
|
NV_OF_BUFFER_DESCRIPTOR m_outputBufferDesc; |
|
NV_OF_BUFFER_DESCRIPTOR m_hintBufferDesc; |
|
NV_OF_BUFFER_DESCRIPTOR m_costBufferDesc; |
|
|
|
int m_outputElementSize; |
|
int m_costBufElementSize; |
|
int m_hintBufElementSize; |
|
|
|
NV_OF_INIT_PARAMS m_initParams; |
|
|
|
std::unique_ptr<NV_OF_CUDA_API_FUNCTION_LIST> m_ofAPI; |
|
NvOFHandle m_hOF; //nvof handle |
|
|
|
NvOFGPUBufferHandle m_hInputBuffer; |
|
NvOFGPUBufferHandle m_hReferenceBuffer; |
|
NvOFGPUBufferHandle m_hOutputBuffer; |
|
NvOFGPUBufferHandle m_hOutputUpScaledBuffer; |
|
NvOFGPUBufferHandle m_hHintBuffer; |
|
NvOFGPUBufferHandle m_hCostBuffer; |
|
|
|
CUdeviceptr m_frame0cuDevPtr; |
|
CUdeviceptr m_frame1cuDevPtr; |
|
CUdeviceptr m_flowXYcuDevPtr; |
|
CUdeviceptr m_flowXYUpScaledcuDevPtr; |
|
CUdeviceptr m_hintcuDevPtr; |
|
CUdeviceptr m_costcuDevPtr; |
|
|
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_inputBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_referenceBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_outputUpScaledBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_hintBufferStrideInfo; |
|
NV_OF_CUDA_BUFFER_STRIDE_INFO m_costBufferStrideInfo; |
|
|
|
NV_OF_CUDA_API_FUNCTION_LIST* GetAPI() |
|
{ |
|
return m_ofAPI.get(); |
|
} |
|
|
|
NvOFHandle GetHandle() { return m_hOF; } |
|
|
|
public: |
|
NvidiaOpticalFlowImpl_2(cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, |
|
NV_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NV_OF_HINT_VECTOR_GRID_SIZE hintGridSize, |
|
bool bEnableROI, std::vector<Rect> roiData, bool bEnableTemporalHints, |
|
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream); |
|
|
|
virtual void calc(InputArray inputImage, InputArray referenceImage, |
|
InputOutputArray flow, Stream& stream = Stream::Null(), |
|
InputArray hint = cv::noArray(), OutputArray cost = cv::noArray()); |
|
|
|
virtual void collectGarbage(); |
|
|
|
virtual void convertToFloat(InputArray flow, InputOutputArray floatFlow); |
|
|
|
virtual int getGridSize() const { return m_gridSize; } |
|
|
|
~NvidiaOpticalFlowImpl_2(); |
|
}; |
|
|
|
NvidiaOpticalFlowImpl_2::NvidiaOpticalFlowImpl_2( |
|
cv::Size imageSize, NV_OF_PERF_LEVEL perfPreset, |
|
NV_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NV_OF_HINT_VECTOR_GRID_SIZE hintGridSize, |
|
bool bEnableROI, std::vector<Rect> roiData, bool bEnableTemporalHints, |
|
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream) : |
|
m_width(imageSize.width), m_height(imageSize.height), m_preset(perfPreset), |
|
m_gridSize(outputGridSize), m_hintGridSize(hintGridSize), |
|
m_enableROI(bEnableROI), m_roiDataRect(roiData), |
|
m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints), |
|
m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints), |
|
m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId), |
|
m_inputStream(inputStream), m_outputStream(outputStream), |
|
m_cuContext(nullptr), m_scaleFactor(1), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8), |
|
m_hwGridSize((NV_OF_OUTPUT_VECTOR_GRID_SIZE)0) |
|
{ |
|
LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init(); |
|
|
|
int nGpu = 0; |
|
|
|
cuSafeCall(cudaGetDeviceCount(&nGpu)); |
|
if (m_gpuId < 0 || m_gpuId >= nGpu) |
|
{ |
|
CV_Error(Error::StsBadArg, "Invalid GPU Ordinal"); |
|
} |
|
|
|
cuSafeCall(cudaSetDevice(m_gpuId)); |
|
cuSafeCall(cudaFree(m_cuContext)); |
|
|
|
cuSafeCall(LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr()(&m_cuContext)); |
|
|
|
if (m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 && |
|
m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 && |
|
m_gridSize != (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_4) |
|
{ |
|
CV_Error(Error::StsBadArg, "Unsupported output grid size"); |
|
} |
|
|
|
if (m_enableExternalHints) |
|
{ |
|
if (m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_1 && |
|
m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_2 && |
|
m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_4 && |
|
m_hintGridSize != (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_8) |
|
{ |
|
CV_Error(Error::StsBadArg, "Unsupported hint grid size"); |
|
} |
|
} |
|
|
|
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST()); |
|
|
|
NVOF_API_CALL(LoadNvidiaModulesObj.GetOFLibraryFunctionPtr()(NV_OF_API_VERSION, m_ofAPI.get())); |
|
NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF)); |
|
|
|
m_roiData = (NV_OF_ROI_RECT*)m_roiDataRect.data(); |
|
|
|
uint32_t size = 0; |
|
if (m_enableROI) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORT_ROI, nullptr, &size)); |
|
std::unique_ptr<uint32_t[]> val1(new uint32_t[size]); |
|
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORT_ROI, val1.get(), &size)); |
|
if (val1[0] != NV_OF_TRUE) |
|
{ |
|
m_enableROI = false; |
|
m_roiData = nullptr; |
|
CV_Error(Error::StsBadFunc, "ROI not supported on this GPU"); |
|
} |
|
} |
|
size = 0; |
|
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, nullptr, &size)); |
|
std::unique_ptr<uint32_t[]> val2(new uint32_t[size]); |
|
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, val2.get(), &size)); |
|
for (uint32_t i = 0; i < size; i++) |
|
{ |
|
if (m_gridSize != val2[i]) |
|
{ |
|
size = 0; |
|
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, nullptr, &size)); |
|
std::unique_ptr<uint32_t[]> val3(new uint32_t[size]); |
|
NVOF_API_CALL(GetAPI()->nvOFGetCaps(GetHandle(), NV_OF_CAPS_SUPPORTED_OUTPUT_GRID_SIZES, val3.get(), &size)); |
|
|
|
m_hwGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX; |
|
for (uint32_t i = 0; i < size; i++) |
|
{ |
|
if (m_gridSize == val3[i]) |
|
{ |
|
m_hwGridSize = m_gridSize; |
|
break; |
|
} |
|
if (m_gridSize < val3[i] && val3[i] < m_hwGridSize) |
|
{ |
|
m_hwGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)val3[i]; |
|
} |
|
} |
|
if (m_hwGridSize >= (NV_OF_OUTPUT_VECTOR_GRID_SIZE)NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX) |
|
{ |
|
CV_Error(Error::StsBadArg, "Invalid Grid Size"); |
|
} |
|
else |
|
{ |
|
m_scaleFactor = m_hwGridSize / m_gridSize; |
|
} |
|
} |
|
else |
|
{ |
|
m_hwGridSize = m_gridSize; |
|
} |
|
} |
|
|
|
auto nOutWidth = (m_width + m_hwGridSize - 1) / m_hwGridSize; |
|
auto nOutHeight = (m_height + m_hwGridSize - 1) / m_hwGridSize; |
|
|
|
auto outBufFmt = NV_OF_BUFFER_FORMAT_SHORT2; |
|
|
|
memset(&m_inputBufferDesc, 0, sizeof(m_inputBufferDesc)); |
|
m_inputBufferDesc.width = m_width; |
|
m_inputBufferDesc.height = m_height; |
|
m_inputBufferDesc.bufferFormat = m_format; |
|
m_inputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_INPUT; |
|
|
|
memset(&m_outputBufferDesc, 0, sizeof(m_outputBufferDesc)); |
|
m_outputBufferDesc.width = nOutWidth; |
|
m_outputBufferDesc.height = nOutHeight; |
|
m_outputBufferDesc.bufferFormat = outBufFmt; |
|
m_outputBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_OUTPUT; |
|
m_outputElementSize = sizeof(NV_OF_FLOW_VECTOR); |
|
|
|
if (m_enableExternalHints) |
|
{ |
|
memset(&m_hintBufferDesc, 0, sizeof(m_hintBufferDesc)); |
|
m_hintBufferDesc.width = nOutWidth; |
|
m_hintBufferDesc.height = nOutHeight; |
|
m_hintBufferDesc.bufferFormat = outBufFmt; |
|
m_hintBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_HINT; |
|
m_hintBufElementSize = m_outputElementSize; |
|
} |
|
|
|
if (m_enableCostBuffer) |
|
{ |
|
memset(&m_costBufferDesc, 0, sizeof(m_costBufferDesc)); |
|
m_costBufferDesc.width = nOutWidth; |
|
m_costBufferDesc.height = nOutHeight; |
|
m_costBufferDesc.bufferFormat = NV_OF_BUFFER_FORMAT_UINT8; |
|
m_costBufferDesc.bufferUsage = NV_OF_BUFFER_USAGE_COST; |
|
m_costBufElementSize = sizeof(int); |
|
} |
|
|
|
memset(&m_initParams, 0, sizeof(m_initParams)); |
|
m_initParams.width = m_inputBufferDesc.width; |
|
m_initParams.height = m_inputBufferDesc.height; |
|
m_initParams.enableExternalHints = (NV_OF_BOOL)m_enableExternalHints; |
|
m_initParams.enableOutputCost = (NV_OF_BOOL)m_enableCostBuffer; |
|
m_initParams.hintGridSize = (NV_OF_BOOL)m_enableExternalHints == NV_OF_TRUE ? |
|
m_hintGridSize : (NV_OF_HINT_VECTOR_GRID_SIZE)NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED; |
|
m_initParams.outGridSize = (NV_OF_OUTPUT_VECTOR_GRID_SIZE)m_hwGridSize; |
|
m_initParams.mode = NV_OF_MODE_OPTICALFLOW; |
|
m_initParams.perfLevel = m_preset; |
|
m_initParams.enableRoi = (NV_OF_BOOL)m_enableROI; |
|
|
|
NVOF_API_CALL(GetAPI()->nvOFInit(GetHandle(), &m_initParams)); |
|
|
|
if (m_inputStream || m_outputStream) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(), |
|
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream))); |
|
} |
|
|
|
//Input Buffer 1 |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hInputBuffer)); |
|
m_frame0cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hInputBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hInputBuffer, &m_inputBufferStrideInfo)); |
|
|
|
//Input Buffer 2 |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hReferenceBuffer)); |
|
m_frame1cuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hReferenceBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hReferenceBuffer, &m_referenceBufferStrideInfo)); |
|
|
|
//Output Buffer |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputBuffer)); |
|
m_flowXYcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hOutputBuffer, &m_outputBufferStrideInfo)); |
|
|
|
if (m_scaleFactor > 1) |
|
{ |
|
m_outputBufferDesc.width = (m_width + m_gridSize - 1) / m_gridSize;; |
|
m_outputBufferDesc.height = (m_height + m_gridSize - 1) / m_gridSize;; |
|
|
|
//Output UpScaled Buffer |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_outputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hOutputUpScaledBuffer)); |
|
m_flowXYUpScaledcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hOutputUpScaledBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hOutputUpScaledBuffer, &m_outputUpScaledBufferStrideInfo)); |
|
} |
|
|
|
//Hint Buffer |
|
if (m_enableExternalHints) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_hintBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hHintBuffer)); |
|
m_hintcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hHintBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hHintBuffer, &m_hintBufferStrideInfo)); |
|
} |
|
|
|
//Cost Buffer |
|
if (m_enableCostBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(), |
|
&m_costBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hCostBuffer)); |
|
m_costcuDevPtr = GetAPI()->nvOFGPUBufferGetCUdeviceptr(m_hCostBuffer); |
|
NVOF_API_CALL(GetAPI()->nvOFGPUBufferGetStrideInfo( |
|
m_hCostBuffer, &m_costBufferStrideInfo)); |
|
} |
|
} |
|
|
|
void NvidiaOpticalFlowImpl_2::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, |
|
Stream& stream, InputArray hint, OutputArray cost) |
|
{ |
|
CV_UNUSED(stream); |
|
GpuMat frame0GpuMat(_frame0.size(), _frame0.type(), (void*)m_frame0cuDevPtr, |
|
m_inputBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
GpuMat frame1GpuMat(_frame1.size(), _frame1.type(), (void*)m_frame1cuDevPtr, |
|
m_referenceBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
GpuMat flowXYGpuMat(Size((m_width + m_hwGridSize - 1) / m_hwGridSize, |
|
(m_height + m_hwGridSize - 1) / m_hwGridSize), CV_16SC2, |
|
(void*)m_flowXYcuDevPtr, m_outputBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
GpuMat flowXYGpuMatUpScaled(Size((m_width + m_gridSize - 1) / m_gridSize, |
|
(m_height + m_gridSize - 1) / m_gridSize), CV_16SC2, |
|
(void*)m_flowXYUpScaledcuDevPtr, m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
|
|
//check whether frame0 is Mat or GpuMat |
|
if (_frame0.isMat()) |
|
{ |
|
//Get Mats from InputArrays |
|
Mat __frame0 = _frame0.getMat(); |
|
frame0GpuMat.upload(__frame0, m_inputStream); |
|
} |
|
else if (_frame0.isGpuMat()) |
|
{ |
|
//Get GpuMats from InputArrays |
|
GpuMat __frame0 = _frame0.getGpuMat(); |
|
__frame0.copyTo(frame0GpuMat, m_inputStream); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect input. Pass input image (frame0) as Mat or GpuMat"); |
|
} |
|
|
|
//check whether frame1 is Mat or GpuMat |
|
if (_frame1.isMat()) |
|
{ |
|
//Get Mats from InputArrays |
|
Mat __frame1 = _frame1.getMat(); |
|
frame1GpuMat.upload(__frame1, m_inputStream); |
|
} |
|
else if (_frame1.isGpuMat()) |
|
{ |
|
//Get GpuMats from InputArrays |
|
GpuMat __frame1 = _frame1.getGpuMat(); |
|
__frame1.copyTo(frame1GpuMat, m_inputStream); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect input. Pass reference image (frame1) as Mat or GpuMat"); |
|
} |
|
|
|
if (m_enableExternalHints) |
|
{ |
|
GpuMat hintGpuMat(hint.size(), hint.type(), (void*)m_hintcuDevPtr, |
|
m_hintBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
|
|
if (hint.isMat()) |
|
{ |
|
//Get Mat from InputArray hint |
|
Mat _hint = hint.getMat(); |
|
hintGpuMat.upload(_hint, m_inputStream); |
|
} |
|
else if (hint.isGpuMat()) |
|
{ |
|
//Get GpuMat from InputArray hint |
|
GpuMat _hint = hint.getGpuMat(); |
|
_hint.copyTo(hintGpuMat, m_inputStream); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, "Incorrect hint buffer passed. Pass Mat or GpuMat"); |
|
} |
|
} |
|
|
|
//Execute Call |
|
NV_OF_EXECUTE_INPUT_PARAMS exeInParams; |
|
NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams; |
|
memset(&exeInParams, 0, sizeof(exeInParams)); |
|
exeInParams.inputFrame = m_hInputBuffer; |
|
exeInParams.referenceFrame = m_hReferenceBuffer; |
|
exeInParams.disableTemporalHints = (NV_OF_BOOL)m_enableTemporalHints == NV_OF_TRUE ? |
|
NV_OF_FALSE : NV_OF_TRUE; |
|
exeInParams.externalHints = m_initParams.enableExternalHints == NV_OF_TRUE ? |
|
m_hHintBuffer : nullptr; |
|
exeInParams.numRois = m_initParams.enableRoi == NV_OF_TRUE ? m_roiDataRect.size() : 0; |
|
exeInParams.roiData = m_initParams.enableRoi == NV_OF_TRUE ? m_roiData : nullptr; |
|
memset(&exeOutParams, 0, sizeof(exeOutParams)); |
|
exeOutParams.outputBuffer = m_hOutputBuffer; |
|
exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE ? |
|
m_hCostBuffer : nullptr; |
|
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams)); |
|
|
|
if (m_scaleFactor > 1) |
|
{ |
|
uint32_t nSrcWidth = flowXYGpuMat.size().width; |
|
uint32_t nSrcHeight = flowXYGpuMat.size().height; |
|
uint32_t nSrcPitch = m_outputBufferStrideInfo.strideInfo[0].strideXInBytes; |
|
uint32_t nDstWidth = flowXYGpuMatUpScaled.size().width; |
|
uint32_t nDstHeight = flowXYGpuMatUpScaled.size().height; |
|
uint32_t nDstPitch = m_outputUpScaledBufferStrideInfo.strideInfo[0].strideXInBytes; |
|
cv::cuda::device::optflow_nvidia::FlowUpsample((void*)m_flowXYcuDevPtr, nSrcWidth, nSrcPitch, |
|
nSrcHeight, (void*)m_flowXYUpScaledcuDevPtr, nDstWidth, nDstPitch, nDstHeight, m_scaleFactor); |
|
|
|
if (_flow.isMat()) |
|
flowXYGpuMatUpScaled.download(_flow, m_outputStream); |
|
else if (_flow.isGpuMat()) |
|
flowXYGpuMatUpScaled.copyTo(_flow, m_outputStream); |
|
else |
|
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat"); |
|
} |
|
else |
|
{ |
|
if (_flow.isMat()) |
|
flowXYGpuMat.download(_flow, m_outputStream); |
|
else if (_flow.isGpuMat()) |
|
flowXYGpuMat.copyTo(_flow, m_outputStream); |
|
else |
|
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat"); |
|
} |
|
|
|
if (m_enableCostBuffer) |
|
{ |
|
GpuMat costGpuMat(Size((m_width + m_hwGridSize - 1) / m_hwGridSize, |
|
(m_height + m_hwGridSize - 1) / m_hwGridSize), CV_8SC1, (void*)m_costcuDevPtr, |
|
m_costBufferStrideInfo.strideInfo[0].strideXInBytes); |
|
|
|
if (cost.isMat()) |
|
costGpuMat.download(cost, m_outputStream); |
|
else if (cost.isGpuMat()) |
|
costGpuMat.copyTo(cost, m_outputStream); |
|
else |
|
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat"); |
|
} |
|
m_outputStream.waitForCompletion(); |
|
} |
|
|
|
void NvidiaOpticalFlowImpl_2::collectGarbage() |
|
{ |
|
if (m_enableROI) |
|
{ |
|
m_roiData = nullptr; |
|
} |
|
if (m_hInputBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hInputBuffer)); |
|
m_hInputBuffer = nullptr; |
|
} |
|
if (m_hReferenceBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hReferenceBuffer)); |
|
m_hReferenceBuffer = nullptr; |
|
} |
|
if (m_hOutputBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputBuffer)); |
|
m_hOutputBuffer = nullptr; |
|
} |
|
if (m_scaleFactor > 1 && m_hOutputUpScaledBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hOutputUpScaledBuffer)); |
|
m_hOutputUpScaledBuffer = nullptr; |
|
} |
|
if (m_enableExternalHints) |
|
{ |
|
if (m_hHintBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hHintBuffer)); |
|
m_hHintBuffer = nullptr; |
|
} |
|
} |
|
if (m_enableCostBuffer) |
|
{ |
|
if (m_hCostBuffer) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer)); |
|
m_hCostBuffer = nullptr; |
|
} |
|
} |
|
if (m_inputStream) |
|
{ |
|
m_inputStream.waitForCompletion(); |
|
} |
|
if (m_outputStream) |
|
{ |
|
m_outputStream.waitForCompletion(); |
|
} |
|
if (m_hOF) |
|
{ |
|
NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF)); |
|
m_hOF = nullptr; |
|
} |
|
} |
|
|
|
NvidiaOpticalFlowImpl_2::~NvidiaOpticalFlowImpl_2() |
|
{ |
|
collectGarbage(); |
|
} |
|
|
|
void NvidiaOpticalFlowImpl_2::convertToFloat(InputArray _flow, InputOutputArray floatFlow) |
|
{ |
|
Mat flow; |
|
if (_flow.isMat()) |
|
{ |
|
Mat __flow = _flow.getMat(); |
|
__flow.copyTo(flow); |
|
} |
|
else if (_flow.isGpuMat()) |
|
{ |
|
GpuMat __flow = _flow.getGpuMat(); |
|
__flow.download(flow); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect flow buffer passed. Pass either Mat or GpuMat"); |
|
} |
|
|
|
int width = flow.size().width; |
|
int height = flow.size().height; |
|
|
|
Mat output(Size(width, height), CV_32FC2); |
|
for (int y = 0; y < height; ++y) |
|
{ |
|
for (int x = 0; x < (int)(width * sizeof(int16_t)); ++x) |
|
{ |
|
output.at<float>(y, x) = (float)(flow.at<int16_t>(y, x) / (float)(1 << 5)); |
|
} |
|
} |
|
|
|
if (floatFlow.isMat()) |
|
{ |
|
output.copyTo(floatFlow); |
|
} |
|
else if (floatFlow.isGpuMat()) |
|
{ |
|
GpuMat _output(output); |
|
_output.copyTo(floatFlow); |
|
} |
|
else |
|
{ |
|
CV_Error(Error::StsBadArg, |
|
"Incorrect flow buffer passed for upsampled flow. Pass either Mat or GpuMat"); |
|
} |
|
}} |
|
|
|
Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create( |
|
cv::Size imageSize, NVIDIA_OF_PERF_LEVEL perfPreset, |
|
bool bEnableTemporalHints, bool bEnableExternalHints, |
|
bool bEnableCostBuffer, int gpuId, |
|
Stream& inputStream, Stream& outputStream) |
|
{ |
|
return makePtr<NvidiaOpticalFlowImpl>( |
|
imageSize, |
|
(NV_OF_PERF_LEVEL)perfPreset, |
|
bEnableTemporalHints, |
|
bEnableExternalHints, |
|
bEnableCostBuffer, |
|
gpuId, |
|
inputStream, |
|
outputStream); |
|
} |
|
|
|
Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create( |
|
cv::Size imageSize, NVIDIA_OF_PERF_LEVEL perfPreset, |
|
NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize, |
|
bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, |
|
int gpuId, Stream& inputStream, Stream& outputStream) |
|
{ |
|
std::vector<Rect> roi(0); |
|
return makePtr<NvidiaOpticalFlowImpl_2>( |
|
imageSize, |
|
(NV_OF_PERF_LEVEL)perfPreset, |
|
(NV_OF_OUTPUT_VECTOR_GRID_SIZE)outputGridSize, |
|
(NV_OF_HINT_VECTOR_GRID_SIZE)hintGridSize, |
|
false, |
|
roi, |
|
bEnableTemporalHints, |
|
bEnableExternalHints, |
|
bEnableCostBuffer, |
|
gpuId, |
|
inputStream, |
|
outputStream); |
|
} |
|
|
|
Ptr<cv::cuda::NvidiaOpticalFlow_2_0> cv::cuda::NvidiaOpticalFlow_2_0::create( |
|
cv::Size imageSize, std::vector<Rect> roiData, NVIDIA_OF_PERF_LEVEL perfPreset, |
|
NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize, NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize, |
|
bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, |
|
int gpuId, Stream& inputStream, Stream& outputStream) |
|
{ |
|
return makePtr<NvidiaOpticalFlowImpl_2>( |
|
imageSize, |
|
(NV_OF_PERF_LEVEL)perfPreset, |
|
(NV_OF_OUTPUT_VECTOR_GRID_SIZE)outputGridSize, |
|
(NV_OF_HINT_VECTOR_GRID_SIZE)hintGridSize, |
|
true, |
|
roiData, |
|
bEnableTemporalHints, |
|
bEnableExternalHints, |
|
bEnableCostBuffer, |
|
gpuId, |
|
inputStream, |
|
outputStream); |
|
} |
|
#endif
|
|
|