From 881440c6c64ae70487002a816143bc2242934339 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov <2536374+asmorkalov@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:07:06 +0300 Subject: [PATCH] Merge pull request #26143 from asmorkalov:as/HAL_opticalFlowLK Added HAL interface for Lukas-Kanade optical flow #26143 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- 3rdparty/carotene/hal/tegra_hal.hpp | 30 ++++++ .../carotene/include/carotene/functions.hpp | 2 +- 3rdparty/carotene/src/opticalflow.cpp | 53 +++------ modules/video/src/hal_replacement.hpp | 101 ++++++++++++++++++ modules/video/src/lkpyramid.cpp | 40 +++++-- 5 files changed, 173 insertions(+), 53 deletions(-) create mode 100644 modules/video/src/hal_replacement.hpp diff --git a/3rdparty/carotene/hal/tegra_hal.hpp b/3rdparty/carotene/hal/tegra_hal.hpp index cb658e8af0..31182a029a 100644 --- a/3rdparty/carotene/hal/tegra_hal.hpp +++ b/3rdparty/carotene/hal/tegra_hal.hpp @@ -1932,4 +1932,34 @@ inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uc #endif // OPENCV_IMGPROC_HAL_INTERFACE_H +// The optimized branch was developed for old armv7 processors +#if defined(__ARM_ARCH) && (__ARM_ARCH == 7) +inline int TEGRA_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_step, + const short* prev_deriv_data, size_t prev_deriv_step, + const uchar* next_data, size_t next_step, + int width, int height, int cn, + const float *prev_points, float *next_points, size_t point_count, + uchar *status, float *err, + const int win_width, const int win_height, + int termination_count, double termination_epsilon, + bool get_min_eigen_vals, + float min_eigen_vals_threshold) +{ + if (!CAROTENE_NS::isSupportedConfiguration()) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + CAROTENE_NS::pyrLKOptFlowLevel(CAROTENE_NS::Size2D(width, height), cn, + prev_data, prev_data_step, prev_deriv_data, prev_deriv_step, + next_data, next_step, + point_count, prev_points, next_points, + status, err, CAROTENE_NS::Size2D(win_width, win_height), + termination_count, termination_epsilon, + get_min_eigen_vals, min_eigen_vals_threshold); + return CV_HAL_ERROR_OK; +} + +#undef cv_hal_LKOpticalFlowLevel +#define cv_hal_LKOpticalFlowLevel TEGRA_LKOpticalFlowLevel +#endif // __ARM_ARCH=7 + #endif diff --git a/3rdparty/carotene/include/carotene/functions.hpp b/3rdparty/carotene/include/carotene/functions.hpp index 76d1328194..8a4fa3efdd 100644 --- a/3rdparty/carotene/include/carotene/functions.hpp +++ b/3rdparty/carotene/include/carotene/functions.hpp @@ -2485,7 +2485,7 @@ namespace CAROTENE_NS { u8 *status, f32 *err, const Size2D &winSize, u32 terminationCount, f64 terminationEpsilon, - u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals, + bool getMinEigenVals, f32 minEigThreshold); } diff --git a/3rdparty/carotene/src/opticalflow.cpp b/3rdparty/carotene/src/opticalflow.cpp index 7b29742c84..463ba77fa0 100644 --- a/3rdparty/carotene/src/opticalflow.cpp +++ b/3rdparty/carotene/src/opticalflow.cpp @@ -58,7 +58,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, u8 *status, f32 *err, const Size2D &winSize, u32 terminationCount, f64 terminationEpsilon, - u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals, + bool getMinEigenVals, f32 minEigThreshold) { internal::assertSupportedConfiguration(); @@ -74,32 +74,11 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, for( u32 ptidx = 0; ptidx < ptCount; ptidx++ ) { - f32 levscale = (1./(1 << level)); u32 ptref = ptidx << 1; - f32 prevPtX = prevPts[ptref+0]*levscale; - f32 prevPtY = prevPts[ptref+1]*levscale; - f32 nextPtX; - f32 nextPtY; - if( level == maxLevel ) - { - if( useInitialFlow ) - { - nextPtX = nextPts[ptref+0]*levscale; - nextPtY = nextPts[ptref+1]*levscale; - } - else - { - nextPtX = prevPtX; - nextPtY = prevPtY; - } - } - else - { - nextPtX = nextPts[ptref+0]*2.f; - nextPtY = nextPts[ptref+1]*2.f; - } - nextPts[ptref+0] = nextPtX; - nextPts[ptref+1] = nextPtY; + f32 prevPtX = prevPts[ptref+0]; + f32 prevPtY = prevPts[ptref+1]; + f32 nextPtX = nextPts[ptref+0]; + f32 nextPtY = nextPts[ptref+1]; s32 iprevPtX, iprevPtY; s32 inextPtX, inextPtY; @@ -111,13 +90,10 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width || iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height ) { - if( level == 0 ) - { - if( status ) - status[ptidx] = false; - if( err ) - err[ptidx] = 0; - } + if( status ) + status[ptidx] = false; + if( err ) + err[ptidx] = 0; continue; } @@ -333,7 +309,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, if( minEig < minEigThreshold || D < FLT_EPSILON ) { - if( level == 0 && status ) + if( status ) status[ptidx] = false; continue; } @@ -353,7 +329,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width || inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height ) { - if( level == 0 && status ) + if( status ) status[ptidx] = false; break; } @@ -469,8 +445,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, prevDeltaX = deltaX; prevDeltaY = deltaY; } - - if( status && status[ptidx] && err && level == 0 && !getMinEigenVals ) + if( status && status[ptidx] && err && !getMinEigenVals ) { f32 nextPointX = nextPts[ptref+0] - halfWinX; f32 nextPointY = nextPts[ptref+1] - halfWinY; @@ -526,9 +501,6 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, (void)winSize; (void)terminationCount; (void)terminationEpsilon; - (void)level; - (void)maxLevel; - (void)useInitialFlow; (void)getMinEigenVals; (void)minEigThreshold; (void)ptCount; @@ -536,4 +508,3 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn, } }//CAROTENE_NS - diff --git a/modules/video/src/hal_replacement.hpp b/modules/video/src/hal_replacement.hpp new file mode 100644 index 0000000000..8d10ab39d1 --- /dev/null +++ b/modules/video/src/hal_replacement.hpp @@ -0,0 +1,101 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_VIDEO_HAL_REPLACEMENT_HPP +#define OPENCV_VIDEO_HAL_REPLACEMENT_HPP + +#include "opencv2/core/hal/interface.h" + +#if defined(__clang__) // clang or MSVC clang +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-parameter" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4100) +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +//! @addtogroup video_hal_interface +//! @note Define your functions to override default implementations: +//! @code +//! #undef cv_hal_LK_optical_flow_level +//! #define cv_hal_LK_optical_flow_level my_hal_LK_optical_flow_level +//! @endcode +//! @{ + +/** +@brief Lucas-Kanade optical flow for single pyramid layer. See calcOpticalFlowPyrLK +@param prev_data previous frame image data +@param prev_data_step previous frame image data step +@param prev_deriv_data previous frame Schaar derivatives +@param prev_deriv_step previous frame Schaar derivatives step +@param next_data next frame image data +@param next_step next frame image step +@param width input images width +@param height input images height +@param cn source image channels +@param prev_points 2d points coordinates (x,y) on the previous frame +@param next_points points coordinates (x,y) on the next frame +@param point_count - amount of input points +@param status optical flow status for each point. Optional output, expected if not nullptr is provided +@param err optical flow estimation error for each point. Optional output, expected if not nullptr is provided +@param win_width optical flow window width +@param win_height optical flow window heigh +@param termination_count maximum algorithm iterations. 0 means unlimited +@param termination_epsilon maximal allowed algorithm error +@param get_min_eigen_vals return minimal egen values as point errors in err buffer +@param min_eigen_vals_threshold eigen values threshold +**/ +inline int hal_ni_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_step, + const short* prev_deriv_data, size_t prev_deriv_step, + const uchar* next_data, size_t next_step, + int width, int height, int cn, + const float *prev_points, float *next_points, size_t point_count, + uchar *status, float *err, + const int win_width, const int win_height, + int termination_count, double termination_epsilon, + bool get_min_eigen_vals, + float min_eigen_vals_threshold) +{ + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + +//! @cond IGNORED +#define cv_hal_LKOpticalFlowLevel hal_ni_LKOpticalFlowLevel +//! @endcond + +//! @} + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +#include "custom_hal.hpp" + +//! @cond IGNORED +#define CALL_HAL_RET(name, fun, retval, ...) \ + int res = __CV_EXPAND(fun(__VA_ARGS__, &retval)); \ + if (res == CV_HAL_ERROR_OK) \ + return retval; \ + else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \ + CV_Error_(cv::Error::StsInternal, \ + ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res)); + + +#define CALL_HAL(name, fun, ...) \ + int res = __CV_EXPAND(fun(__VA_ARGS__)); \ + if (res == CV_HAL_ERROR_OK) \ + return; \ + else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \ + CV_Error_(cv::Error::StsInternal, \ + ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res)); +//! @endcond + +#endif diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 6d51c0cf1a..a9917595e9 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -50,6 +50,7 @@ #endif #include "opencv2/core/openvx/ovx_defs.hpp" +#include "hal_replacement.hpp" #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) @@ -184,11 +185,17 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const { CV_INSTRUMENT_REGION(); + const int W_BITS = 14, W_BITS1 = 14; + const float FLT_SCALE = 1.f/(1 << 20); + Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f); const Mat& I = *prevImg; const Mat& J = *nextImg; const Mat& derivI = *prevDeriv; + cv::AutoBuffer prevPtsScaledData(range.end - range.start); + Point2f* prevPtsScaled = prevPtsScaledData.data(); + int j, cn = I.channels(), cn2 = cn*2; cv::AutoBuffer _buf(winSize.area()*(cn + cn2)); int derivDepth = DataType::depth; @@ -210,7 +217,23 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const else nextPt = nextPts[ptidx]*2.f; nextPts[ptidx] = nextPt; + prevPtsScaled[ptidx-range.start] = prevPt; + } + CALL_HAL(LKOpticalFlowLevel, cv_hal_LKOpticalFlowLevel, + I.data, I.step, (const short*)derivI.data, derivI.step, J.data, J.step, + I.cols, I.rows, I.channels(), + (float*)prevPtsScaled, (float*)(nextPts+range.start), range.end-range.start, + (level == 0) ? status+range.start: nullptr, + err != nullptr ? err+range.start: nullptr, + winSize.width, winSize.height, criteria.maxCount, criteria.epsilon, + (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0, + (float)minEigThreshold + ); + + for( int ptidx = range.start; ptidx < range.end; ptidx++ ) + { + Point2f prevPt = prevPtsScaled[ptidx-range.start]; Point2i iprevPt, inextPt; prevPt -= halfWin; iprevPt.x = cvFloor(prevPt.x); @@ -221,8 +244,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const { if( level == 0 ) { - if( status ) - status[ptidx] = false; + status[ptidx] = false; if( err ) err[ptidx] = 0; } @@ -231,8 +253,6 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const float a = prevPt.x - iprevPt.x; float b = prevPt.y - iprevPt.y; - const int W_BITS = 14, W_BITS1 = 14; - const float FLT_SCALE = 1.f/(1 << 20); int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS)); int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS)); int iw10 = cvRound((1.f - a)*b*(1 << W_BITS)); @@ -479,14 +499,14 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const if( minEig < minEigThreshold || D < FLT_EPSILON ) { - if( level == 0 && status ) + if(level == 0) status[ptidx] = false; continue; } D = 1.f/D; - nextPt -= halfWin; + Point2f nextPt = nextPts[ptidx] - halfWin; Point2f prevDelta; for( j = 0; j < criteria.maxCount; j++ ) @@ -497,7 +517,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const if( inextPt.x < -winSize.width || inextPt.x >= J.cols || inextPt.y < -winSize.height || inextPt.y >= J.rows ) { - if( level == 0 && status ) + if( level == 0 ) status[ptidx] = false; break; } @@ -680,7 +700,6 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const prevDelta = delta; } - CV_Assert(status != NULL); if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 ) { Point2f nextPoint = nextPts[ptidx] - halfWin; @@ -692,8 +711,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols || inextPoint.y < -winSize.height || inextPoint.y >= J.rows ) { - if( status ) - status[ptidx] = false; + status[ptidx] = false; continue; } @@ -1280,7 +1298,7 @@ void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg, Mat statusMat = _status.getMat(), errMat; CV_Assert( statusMat.isContinuous() ); uchar* status = statusMat.ptr(); - float* err = 0; + float* err = nullptr; for( i = 0; i < npoints; i++ ) status[i] = true;