Merge pull request #26143 from asmorkalov:as/HAL_opticalFlowLK

Added HAL interface for Lukas-Kanade optical flow #26143 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2 months ago · 881440c6c6
parent ee685017c3
commit 881440c6c6
5 changed files with 173 additions and 53 deletions
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@ -1932,4 +1932,34 @@ inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uc

 #endif // OPENCV_IMGPROC_HAL_INTERFACE_H

+// The optimized branch was developed for old armv7 processors
+#if defined(__ARM_ARCH) && (__ARM_ARCH == 7)
+inline int TEGRA_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_step,
+                       const short* prev_deriv_data, size_t prev_deriv_step,
+                       const uchar* next_data, size_t next_step,
+                       int width, int height, int cn,
+                       const float *prev_points, float *next_points, size_t point_count,
+                       uchar *status, float *err,
+                       const int win_width, const int win_height,
+                       int termination_count, double termination_epsilon,
+                       bool get_min_eigen_vals,
+                       float min_eigen_vals_threshold)
+{
+    if (!CAROTENE_NS::isSupportedConfiguration())
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+
+    CAROTENE_NS::pyrLKOptFlowLevel(CAROTENE_NS::Size2D(width, height), cn,
+        prev_data, prev_data_step, prev_deriv_data, prev_deriv_step,
+        next_data, next_step,
+        point_count, prev_points, next_points,
+        status, err, CAROTENE_NS::Size2D(win_width, win_height),
+        termination_count, termination_epsilon,
+        get_min_eigen_vals, min_eigen_vals_threshold);
+    return CV_HAL_ERROR_OK;
+}
+
+#undef cv_hal_LKOpticalFlowLevel
+#define cv_hal_LKOpticalFlowLevel TEGRA_LKOpticalFlowLevel
+#endif // __ARM_ARCH=7
+
 #endif
--- a/3rdparty/carotene/include/carotene/functions.hpp
+++ b/3rdparty/carotene/include/carotene/functions.hpp
@ -2485,7 +2485,7 @@ namespace CAROTENE_NS {
                           u8 *status, f32 *err,
                           const Size2D &winSize,
                           u32 terminationCount, f64 terminationEpsilon,
-                           u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
+                           bool getMinEigenVals,
                           f32 minEigThreshold);
 }

--- a/3rdparty/carotene/src/opticalflow.cpp
+++ b/3rdparty/carotene/src/opticalflow.cpp
@ -58,7 +58,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
                       u8 *status, f32 *err,
                       const Size2D &winSize,
                       u32 terminationCount, f64 terminationEpsilon,
-                       u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
+                       bool getMinEigenVals,
                       f32 minEigThreshold)
 {
    internal::assertSupportedConfiguration();
@ -74,32 +74,11 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,

    for( u32 ptidx = 0; ptidx < ptCount; ptidx++ )
    {
-        f32 levscale = (1./(1 << level));
        u32 ptref = ptidx << 1;
-        f32 prevPtX = prevPts[ptref+0]*levscale;
-        f32 prevPtY = prevPts[ptref+1]*levscale;
-        f32 nextPtX;
-        f32 nextPtY;
-        if( level == maxLevel )
-        {
-            if( useInitialFlow )
-            {
-                nextPtX = nextPts[ptref+0]*levscale;
-                nextPtY = nextPts[ptref+1]*levscale;
-            }
-            else
-            {
-                nextPtX = prevPtX;
-                nextPtY = prevPtY;
-            }
-        }
-        else
-        {
-            nextPtX = nextPts[ptref+0]*2.f;
-            nextPtY = nextPts[ptref+1]*2.f;
-        }
-        nextPts[ptref+0] = nextPtX;
-        nextPts[ptref+1] = nextPtY;
+        f32 prevPtX = prevPts[ptref+0];
+        f32 prevPtY = prevPts[ptref+1];
+        f32 nextPtX = nextPts[ptref+0];
+        f32 nextPtY = nextPts[ptref+1];

        s32 iprevPtX, iprevPtY;
        s32 inextPtX, inextPtY;
@ -111,13 +90,10 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
        if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width ||
            iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height )
        {
-            if( level == 0 )
-            {
-                if( status )
-                    status[ptidx] = false;
-                if( err )
-                    err[ptidx] = 0;
-            }
+            if( status )
+                status[ptidx] = false;
+            if( err )
+                err[ptidx] = 0;
            continue;
        }

@ -333,7 +309,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,

        if( minEig < minEigThreshold || D < FLT_EPSILON )
        {
-            if( level == 0 && status )
+            if( status )
                status[ptidx] = false;
            continue;
        }
@ -353,7 +329,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
            if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width ||
               inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height )
            {
-                if( level == 0 && status )
+                if( status )
                    status[ptidx] = false;
                break;
            }
@ -469,8 +445,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
            prevDeltaX = deltaX;
            prevDeltaY = deltaY;
        }
-
-        if( status && status[ptidx] && err && level == 0 && !getMinEigenVals )
+        if( status && status[ptidx] && err && !getMinEigenVals )
        {
            f32 nextPointX = nextPts[ptref+0] - halfWinX;
            f32 nextPointY = nextPts[ptref+1] - halfWinY;
@ -526,9 +501,6 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
    (void)winSize;
    (void)terminationCount;
    (void)terminationEpsilon;
-    (void)level;
-    (void)maxLevel;
-    (void)useInitialFlow;
    (void)getMinEigenVals;
    (void)minEigThreshold;
    (void)ptCount;
@ -536,4 +508,3 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
 }

 }//CAROTENE_NS
-
--- a/modules/video/src/hal_replacement.hpp
+++ b/modules/video/src/hal_replacement.hpp
@ -0,0 +1,101 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEO_HAL_REPLACEMENT_HPP
+#define OPENCV_VIDEO_HAL_REPLACEMENT_HPP
+
+#include "opencv2/core/hal/interface.h"
+
+#if defined(__clang__)  // clang or MSVC clang
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+//! @addtogroup video_hal_interface
+//! @note Define your functions to override default implementations:
+//! @code
+//! #undef cv_hal_LK_optical_flow_level
+//! #define cv_hal_LK_optical_flow_level my_hal_LK_optical_flow_level
+//! @endcode
+//! @{
+
+/**
+@brief Lucas-Kanade optical flow for single pyramid layer. See calcOpticalFlowPyrLK
+@param prev_data previous frame image data
+@param prev_data_step previous frame image data step
+@param prev_deriv_data previous frame Schaar derivatives
+@param prev_deriv_step previous frame Schaar derivatives step
+@param next_data next frame image data
+@param next_step next frame image step
+@param width input images width
+@param height input images height
+@param cn source image channels
+@param prev_points 2d points coordinates (x,y) on the previous frame
+@param next_points points coordinates (x,y) on the next frame
+@param point_count - amount of input points
+@param status optical flow status for each point. Optional output, expected if not nullptr is provided
+@param err optical flow estimation error for each point. Optional output, expected if not nullptr is provided
+@param win_width optical flow window width
+@param win_height optical flow window heigh
+@param termination_count maximum algorithm iterations. 0 means unlimited
+@param termination_epsilon maximal allowed algorithm error
+@param get_min_eigen_vals return minimal egen values as point errors in err buffer
+@param min_eigen_vals_threshold eigen values threshold
+**/
+inline int hal_ni_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_step,
+                       const short* prev_deriv_data, size_t prev_deriv_step,
+                       const uchar* next_data, size_t next_step,
+                       int width, int height, int cn,
+                       const float *prev_points, float *next_points, size_t point_count,
+                       uchar *status, float *err,
+                       const int win_width, const int win_height,
+                       int termination_count, double termination_epsilon,
+                       bool get_min_eigen_vals,
+                       float min_eigen_vals_threshold)
+{
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
+//! @cond IGNORED
+#define cv_hal_LKOpticalFlowLevel hal_ni_LKOpticalFlowLevel
+//! @endcond
+
+//! @}
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+#include "custom_hal.hpp"
+
+//! @cond IGNORED
+#define CALL_HAL_RET(name, fun, retval, ...) \
+    int res = __CV_EXPAND(fun(__VA_ARGS__, &retval)); \
+    if (res == CV_HAL_ERROR_OK) \
+        return retval; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+
+
+#define CALL_HAL(name, fun, ...) \
+    int res = __CV_EXPAND(fun(__VA_ARGS__)); \
+    if (res == CV_HAL_ERROR_OK) \
+        return; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+//! @endcond
+
+#endif
--- a/modules/video/src/lkpyramid.cpp
+++ b/modules/video/src/lkpyramid.cpp
@ -50,6 +50,7 @@
 #endif

 #include "opencv2/core/openvx/ovx_defs.hpp"
+#include "hal_replacement.hpp"

 #define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))

@ -184,11 +185,17 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
 {
    CV_INSTRUMENT_REGION();

+    const int W_BITS = 14, W_BITS1 = 14;
+    const float FLT_SCALE = 1.f/(1 << 20);
+
    Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
    const Mat& I = *prevImg;
    const Mat& J = *nextImg;
    const Mat& derivI = *prevDeriv;

+    cv::AutoBuffer<Point2f> prevPtsScaledData(range.end - range.start);
+    Point2f* prevPtsScaled = prevPtsScaledData.data();
+
    int j, cn = I.channels(), cn2 = cn*2;
    cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
    int derivDepth = DataType<deriv_type>::depth;
@ -210,7 +217,23 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
        else
            nextPt = nextPts[ptidx]*2.f;
        nextPts[ptidx] = nextPt;
+        prevPtsScaled[ptidx-range.start] = prevPt;
+    }

+    CALL_HAL(LKOpticalFlowLevel, cv_hal_LKOpticalFlowLevel,
+        I.data, I.step, (const short*)derivI.data, derivI.step, J.data, J.step,
+        I.cols, I.rows, I.channels(),
+        (float*)prevPtsScaled, (float*)(nextPts+range.start), range.end-range.start,
+        (level == 0) ? status+range.start: nullptr,
+        err != nullptr ? err+range.start: nullptr,
+        winSize.width, winSize.height, criteria.maxCount, criteria.epsilon,
+        (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0,
+        (float)minEigThreshold
+    );
+
+    for( int ptidx = range.start; ptidx < range.end; ptidx++ )
+    {
+        Point2f prevPt = prevPtsScaled[ptidx-range.start];
        Point2i iprevPt, inextPt;
        prevPt -= halfWin;
        iprevPt.x = cvFloor(prevPt.x);
@ -221,8 +244,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
        {
            if( level == 0 )
            {
-                if( status )
-                    status[ptidx] = false;
+                status[ptidx] = false;
                if( err )
                    err[ptidx] = 0;
            }
@ -231,8 +253,6 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const

        float a = prevPt.x - iprevPt.x;
        float b = prevPt.y - iprevPt.y;
-        const int W_BITS = 14, W_BITS1 = 14;
-        const float FLT_SCALE = 1.f/(1 << 20);
        int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
        int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
        int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
@ -479,14 +499,14 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const

        if( minEig < minEigThreshold || D < FLT_EPSILON )
        {
-            if( level == 0 && status )
+            if(level == 0)
                status[ptidx] = false;
            continue;
        }

        D = 1.f/D;

-        nextPt -= halfWin;
+        Point2f nextPt = nextPts[ptidx] - halfWin;
        Point2f prevDelta;

        for( j = 0; j < criteria.maxCount; j++ )
@ -497,7 +517,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
            if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
               inextPt.y < -winSize.height || inextPt.y >= J.rows )
            {
-                if( level == 0 && status )
+                if( level == 0 )
                    status[ptidx] = false;
                break;
            }
@ -680,7 +700,6 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
            prevDelta = delta;
        }

-        CV_Assert(status != NULL);
        if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
        {
            Point2f nextPoint = nextPts[ptidx] - halfWin;
@ -692,8 +711,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
            if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
                inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
            {
-                if( status )
-                    status[ptidx] = false;
+                status[ptidx] = false;
                continue;
            }

@ -1280,7 +1298,7 @@ void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
    Mat statusMat = _status.getMat(), errMat;
    CV_Assert( statusMat.isContinuous() );
    uchar* status = statusMat.ptr();
-    float* err = 0;
+    float* err = nullptr;

    for( i = 0; i < npoints; i++ )
        status[i] = true;