diff --git a/3rdparty/carotene/hal/tegra_hal.hpp b/3rdparty/carotene/hal/tegra_hal.hpp
index cb658e8af0..31182a029a 100644
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
@@ -1932,4 +1932,34 @@ inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uc
 
 #endif // OPENCV_IMGPROC_HAL_INTERFACE_H
 
+// The optimized branch was developed for old armv7 processors
+#if defined(__ARM_ARCH) && (__ARM_ARCH == 7)
+inline int TEGRA_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_step,
+                       const short* prev_deriv_data, size_t prev_deriv_step,
+                       const uchar* next_data, size_t next_step,
+                       int width, int height, int cn,
+                       const float *prev_points, float *next_points, size_t point_count,
+                       uchar *status, float *err,
+                       const int win_width, const int win_height,
+                       int termination_count, double termination_epsilon,
+                       bool get_min_eigen_vals,
+                       float min_eigen_vals_threshold)
+{
+    if (!CAROTENE_NS::isSupportedConfiguration())
+        return CV_HAL_ERROR_NOT_IMPLEMENTED;
+
+    CAROTENE_NS::pyrLKOptFlowLevel(CAROTENE_NS::Size2D(width, height), cn,
+        prev_data, prev_data_step, prev_deriv_data, prev_deriv_step,
+        next_data, next_step,
+        point_count, prev_points, next_points,
+        status, err, CAROTENE_NS::Size2D(win_width, win_height),
+        termination_count, termination_epsilon,
+        get_min_eigen_vals, min_eigen_vals_threshold);
+    return CV_HAL_ERROR_OK;
+}
+
+#undef cv_hal_LKOpticalFlowLevel
+#define cv_hal_LKOpticalFlowLevel TEGRA_LKOpticalFlowLevel
+#endif // __ARM_ARCH=7
+
 #endif
diff --git a/3rdparty/carotene/include/carotene/functions.hpp b/3rdparty/carotene/include/carotene/functions.hpp
index 76d1328194..8a4fa3efdd 100644
--- a/3rdparty/carotene/include/carotene/functions.hpp
+++ b/3rdparty/carotene/include/carotene/functions.hpp
@@ -2485,7 +2485,7 @@ namespace CAROTENE_NS {
                            u8 *status, f32 *err,
                            const Size2D &winSize,
                            u32 terminationCount, f64 terminationEpsilon,
-                           u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
+                           bool getMinEigenVals,
                            f32 minEigThreshold);
 }
 
diff --git a/3rdparty/carotene/src/opticalflow.cpp b/3rdparty/carotene/src/opticalflow.cpp
index 7b29742c84..463ba77fa0 100644
--- a/3rdparty/carotene/src/opticalflow.cpp
+++ b/3rdparty/carotene/src/opticalflow.cpp
@@ -58,7 +58,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
                        u8 *status, f32 *err,
                        const Size2D &winSize,
                        u32 terminationCount, f64 terminationEpsilon,
-                       u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
+                       bool getMinEigenVals,
                        f32 minEigThreshold)
 {
     internal::assertSupportedConfiguration();
@@ -74,32 +74,11 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
 
     for( u32 ptidx = 0; ptidx < ptCount; ptidx++ )
     {
-        f32 levscale = (1./(1 << level));
         u32 ptref = ptidx << 1;
-        f32 prevPtX = prevPts[ptref+0]*levscale;
-        f32 prevPtY = prevPts[ptref+1]*levscale;
-        f32 nextPtX;
-        f32 nextPtY;
-        if( level == maxLevel )
-        {
-            if( useInitialFlow )
-            {
-                nextPtX = nextPts[ptref+0]*levscale;
-                nextPtY = nextPts[ptref+1]*levscale;
-            }
-            else
-            {
-                nextPtX = prevPtX;
-                nextPtY = prevPtY;
-            }
-        }
-        else
-        {
-            nextPtX = nextPts[ptref+0]*2.f;
-            nextPtY = nextPts[ptref+1]*2.f;
-        }
-        nextPts[ptref+0] = nextPtX;
-        nextPts[ptref+1] = nextPtY;
+        f32 prevPtX = prevPts[ptref+0];
+        f32 prevPtY = prevPts[ptref+1];
+        f32 nextPtX = nextPts[ptref+0];
+        f32 nextPtY = nextPts[ptref+1];
 
         s32 iprevPtX, iprevPtY;
         s32 inextPtX, inextPtY;
@@ -111,13 +90,10 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
         if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width ||
             iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height )
         {
-            if( level == 0 )
-            {
-                if( status )
-                    status[ptidx] = false;
-                if( err )
-                    err[ptidx] = 0;
-            }
+            if( status )
+                status[ptidx] = false;
+            if( err )
+                err[ptidx] = 0;
             continue;
         }
 
@@ -333,7 +309,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
 
         if( minEig < minEigThreshold || D < FLT_EPSILON )
         {
-            if( level == 0 && status )
+            if( status )
                 status[ptidx] = false;
             continue;
         }
@@ -353,7 +329,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
             if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width ||
                inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height )
             {
-                if( level == 0 && status )
+                if( status )
                     status[ptidx] = false;
                 break;
             }
@@ -469,8 +445,7 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
             prevDeltaX = deltaX;
             prevDeltaY = deltaY;
         }
-
-        if( status && status[ptidx] && err && level == 0 && !getMinEigenVals )
+        if( status && status[ptidx] && err && !getMinEigenVals )
         {
             f32 nextPointX = nextPts[ptref+0] - halfWinX;
             f32 nextPointY = nextPts[ptref+1] - halfWinY;
@@ -526,9 +501,6 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
     (void)winSize;
     (void)terminationCount;
     (void)terminationEpsilon;
-    (void)level;
-    (void)maxLevel;
-    (void)useInitialFlow;
     (void)getMinEigenVals;
     (void)minEigThreshold;
     (void)ptCount;
@@ -536,4 +508,3 @@ void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
 }
 
 }//CAROTENE_NS
-
diff --git a/modules/video/src/hal_replacement.hpp b/modules/video/src/hal_replacement.hpp
new file mode 100644
index 0000000000..8d10ab39d1
--- /dev/null
+++ b/modules/video/src/hal_replacement.hpp
@@ -0,0 +1,101 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_VIDEO_HAL_REPLACEMENT_HPP
+#define OPENCV_VIDEO_HAL_REPLACEMENT_HPP
+
+#include "opencv2/core/hal/interface.h"
+
+#if defined(__clang__)  // clang or MSVC clang
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+//! @addtogroup video_hal_interface
+//! @note Define your functions to override default implementations:
+//! @code
+//! #undef cv_hal_LK_optical_flow_level
+//! #define cv_hal_LK_optical_flow_level my_hal_LK_optical_flow_level
+//! @endcode
+//! @{
+
+/**
+@brief Lucas-Kanade optical flow for single pyramid layer. See calcOpticalFlowPyrLK
+@param prev_data previous frame image data
+@param prev_data_step previous frame image data step
+@param prev_deriv_data previous frame Schaar derivatives
+@param prev_deriv_step previous frame Schaar derivatives step
+@param next_data next frame image data
+@param next_step next frame image step
+@param width input images width
+@param height input images height
+@param cn source image channels
+@param prev_points 2d points coordinates (x,y) on the previous frame
+@param next_points points coordinates (x,y) on the next frame
+@param point_count - amount of input points
+@param status optical flow status for each point. Optional output, expected if not nullptr is provided
+@param err optical flow estimation error for each point. Optional output, expected if not nullptr is provided
+@param win_width optical flow window width
+@param win_height optical flow window heigh
+@param termination_count maximum algorithm iterations. 0 means unlimited
+@param termination_epsilon maximal allowed algorithm error
+@param get_min_eigen_vals return minimal egen values as point errors in err buffer
+@param min_eigen_vals_threshold eigen values threshold
+**/
+inline int hal_ni_LKOpticalFlowLevel(const uchar *prev_data, size_t prev_data_step,
+                       const short* prev_deriv_data, size_t prev_deriv_step,
+                       const uchar* next_data, size_t next_step,
+                       int width, int height, int cn,
+                       const float *prev_points, float *next_points, size_t point_count,
+                       uchar *status, float *err,
+                       const int win_width, const int win_height,
+                       int termination_count, double termination_epsilon,
+                       bool get_min_eigen_vals,
+                       float min_eigen_vals_threshold)
+{
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
+//! @cond IGNORED
+#define cv_hal_LKOpticalFlowLevel hal_ni_LKOpticalFlowLevel
+//! @endcond
+
+//! @}
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+#include "custom_hal.hpp"
+
+//! @cond IGNORED
+#define CALL_HAL_RET(name, fun, retval, ...) \
+    int res = __CV_EXPAND(fun(__VA_ARGS__, &retval)); \
+    if (res == CV_HAL_ERROR_OK) \
+        return retval; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+
+
+#define CALL_HAL(name, fun, ...) \
+    int res = __CV_EXPAND(fun(__VA_ARGS__)); \
+    if (res == CV_HAL_ERROR_OK) \
+        return; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+//! @endcond
+
+#endif
diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp
index 6d51c0cf1a..a9917595e9 100644
--- a/modules/video/src/lkpyramid.cpp
+++ b/modules/video/src/lkpyramid.cpp
@@ -50,6 +50,7 @@
 #endif
 
 #include "opencv2/core/openvx/ovx_defs.hpp"
+#include "hal_replacement.hpp"
 
 #define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
 
@@ -184,11 +185,17 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
 {
     CV_INSTRUMENT_REGION();
 
+    const int W_BITS = 14, W_BITS1 = 14;
+    const float FLT_SCALE = 1.f/(1 << 20);
+
     Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
     const Mat& I = *prevImg;
     const Mat& J = *nextImg;
     const Mat& derivI = *prevDeriv;
 
+    cv::AutoBuffer<Point2f> prevPtsScaledData(range.end - range.start);
+    Point2f* prevPtsScaled = prevPtsScaledData.data();
+
     int j, cn = I.channels(), cn2 = cn*2;
     cv::AutoBuffer<deriv_type> _buf(winSize.area()*(cn + cn2));
     int derivDepth = DataType<deriv_type>::depth;
@@ -210,7 +217,23 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
         else
             nextPt = nextPts[ptidx]*2.f;
         nextPts[ptidx] = nextPt;
+        prevPtsScaled[ptidx-range.start] = prevPt;
+    }
 
+    CALL_HAL(LKOpticalFlowLevel, cv_hal_LKOpticalFlowLevel,
+        I.data, I.step, (const short*)derivI.data, derivI.step, J.data, J.step,
+        I.cols, I.rows, I.channels(),
+        (float*)prevPtsScaled, (float*)(nextPts+range.start), range.end-range.start,
+        (level == 0) ? status+range.start: nullptr,
+        err != nullptr ? err+range.start: nullptr,
+        winSize.width, winSize.height, criteria.maxCount, criteria.epsilon,
+        (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0,
+        (float)minEigThreshold
+    );
+
+    for( int ptidx = range.start; ptidx < range.end; ptidx++ )
+    {
+        Point2f prevPt = prevPtsScaled[ptidx-range.start];
         Point2i iprevPt, inextPt;
         prevPt -= halfWin;
         iprevPt.x = cvFloor(prevPt.x);
@@ -221,8 +244,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
         {
             if( level == 0 )
             {
-                if( status )
-                    status[ptidx] = false;
+                status[ptidx] = false;
                 if( err )
                     err[ptidx] = 0;
             }
@@ -231,8 +253,6 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
 
         float a = prevPt.x - iprevPt.x;
         float b = prevPt.y - iprevPt.y;
-        const int W_BITS = 14, W_BITS1 = 14;
-        const float FLT_SCALE = 1.f/(1 << 20);
         int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
         int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
         int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
@@ -479,14 +499,14 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
 
         if( minEig < minEigThreshold || D < FLT_EPSILON )
         {
-            if( level == 0 && status )
+            if(level == 0)
                 status[ptidx] = false;
             continue;
         }
 
         D = 1.f/D;
 
-        nextPt -= halfWin;
+        Point2f nextPt = nextPts[ptidx] - halfWin;
         Point2f prevDelta;
 
         for( j = 0; j < criteria.maxCount; j++ )
@@ -497,7 +517,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
             if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
                inextPt.y < -winSize.height || inextPt.y >= J.rows )
             {
-                if( level == 0 && status )
+                if( level == 0 )
                     status[ptidx] = false;
                 break;
             }
@@ -680,7 +700,6 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
             prevDelta = delta;
         }
 
-        CV_Assert(status != NULL);
         if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
         {
             Point2f nextPoint = nextPts[ptidx] - halfWin;
@@ -692,8 +711,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
             if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
                 inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
             {
-                if( status )
-                    status[ptidx] = false;
+                status[ptidx] = false;
                 continue;
             }
 
@@ -1280,7 +1298,7 @@ void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
     Mat statusMat = _status.getMat(), errMat;
     CV_Assert( statusMat.isContinuous() );
     uchar* status = statusMat.ptr();
-    float* err = 0;
+    float* err = nullptr;
 
     for( i = 0; i < npoints; i++ )
         status[i] = true;