You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
422 lines
16 KiB
422 lines
16 KiB
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html. |
|
#ifndef _RLOF_INVOKERBASE_HPP_ |
|
#define _RLOF_INVOKERBASE_HPP_ |
|
|
|
#ifndef CV_DESCALE |
|
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) |
|
#endif |
|
#define FLT_RESCALE 1 |
|
|
|
#include "rlof_localflow.h" |
|
#include <unordered_map> |
|
#include "opencv2/core/hal/intrin.hpp" |
|
using namespace std; |
|
using namespace cv; |
|
namespace cv { |
|
namespace optflow { |
|
|
|
|
|
typedef short deriv_type; |
|
#if CV_SIMD128 |
|
|
|
static inline void getVBitMask(const int & width, v_int32x4 & mask0, v_int32x4 & mask1) |
|
{ |
|
int noBits = width - static_cast<int>(floor(width / 8.f) * 8.f); |
|
unsigned int val[8]; |
|
for (int n = 0; n < 8; n++) |
|
{ |
|
val[n] = (noBits > n) ? (std::numeric_limits<unsigned int>::max()) : 0; |
|
} |
|
mask0 = v_int32x4(val[0], val[1], val[2], val[3]); |
|
mask1 = v_int32x4(val[4], val[5], val[6], val[7]); |
|
} |
|
#endif |
|
typedef uchar tMaskType; |
|
#define tCVMaskType CV_8UC1 |
|
#define MaskSet 0xffffffff |
|
|
|
static inline void copyWinBuffers(int iw00, int iw01, int iw10, int iw11, |
|
Size winSize, |
|
const Mat & I, const Mat & derivI, const Mat & winMaskMat, |
|
Mat & IWinBuf, Mat & derivIWinBuf, |
|
Point iprevPt) |
|
{ |
|
int cn = I.channels(), cn2 = cn * 2; |
|
const int W_BITS = 14; |
|
#if CV_SIMD128 |
|
v_int16x8 vqw0((short)(iw00), (short)(iw01), (short)(iw00), (short)(iw01), (short)(iw00), (short)(iw01), (short)(iw00), (short)(iw01)); |
|
v_int16x8 vqw1((short)(iw10), (short)(iw11), (short)(iw10), (short)(iw11), (short)(iw10), (short)(iw11), (short)(iw10), (short)(iw11)); |
|
v_int32x4 vdelta_d = v_setall_s32(1 << (W_BITS - 1)); |
|
v_int32x4 vdelta = v_setall_s32(1 << (W_BITS - 5 - 1)); |
|
v_int32x4 vmax_val_32 = v_setall_s32(std::numeric_limits<unsigned int>::max()); |
|
v_int32x4 vmask_border_0, vmask_border_1; |
|
getVBitMask(winSize.width, vmask_border_0, vmask_border_1); |
|
#endif |
|
|
|
// extract the patch from the first image, compute covariation matrix of derivatives |
|
int x, y; |
|
for (y = 0; y < winSize.height; y++) |
|
{ |
|
const uchar* src = I.ptr<uchar>(y + iprevPt.y, 0) + iprevPt.x*cn; |
|
const uchar* src1 = I.ptr<uchar>(y + iprevPt.y + 1, 0) + iprevPt.x*cn; |
|
const short* dsrc = derivI.ptr<short>(y + iprevPt.y, 0) + iprevPt.x*cn2; |
|
const short* dsrc1 = derivI.ptr<short>(y + iprevPt.y + 1, 0) + iprevPt.x*cn2; |
|
short* Iptr = IWinBuf.ptr<short>(y, 0); |
|
short* dIptr = derivIWinBuf.ptr<short>(y, 0); |
|
const tMaskType* maskPtr = winMaskMat.ptr<tMaskType>(y, 0); |
|
x = 0; |
|
#if CV_SIMD128 |
|
|
|
for (; x <= winSize.width*cn; x += 8, dsrc += 8 * 2, dsrc1 += 8 * 2, dIptr += 8 * 2) |
|
{ |
|
v_int32x4 vmask0 = v_mul(v_reinterpret_as_s32(v_load_expand_q(maskPtr + x)), vmax_val_32); |
|
v_int32x4 vmask1 = v_mul(v_reinterpret_as_s32(v_load_expand_q(maskPtr + x + 4)), vmax_val_32); |
|
if (x + 4 > winSize.width) |
|
{ |
|
vmask0 = v_and(vmask0, vmask_border_0); |
|
} |
|
if (x + 8 > winSize.width) |
|
{ |
|
vmask1 = v_and(vmask1, vmask_border_1); |
|
} |
|
|
|
v_int32x4 t0, t1; |
|
v_int16x8 v00, v01, v10, v11, t00, t01, t10, t11; |
|
v00 = v_reinterpret_as_s16(v_load_expand(src + x)); |
|
v01 = v_reinterpret_as_s16(v_load_expand(src + x + cn)); |
|
v10 = v_reinterpret_as_s16(v_load_expand(src1 + x)); |
|
v11 = v_reinterpret_as_s16(v_load_expand(src1 + x + cn)); |
|
|
|
v_zip(v00, v01, t00, t01); |
|
v_zip(v10, v11, t10, t11); |
|
t0 = v_add(v_dotprod(t00, vqw0, vdelta), v_dotprod(t10, vqw1)); |
|
t1 = v_add(v_dotprod(t01, vqw0, vdelta), v_dotprod(t11, vqw1)); |
|
t0 = v_and(v_shr(t0, W_BITS - 5), vmask0); |
|
t1 = v_and(v_shr(t1, W_BITS - 5), vmask1); |
|
v_store(Iptr + x, v_pack(t0, t1)); |
|
|
|
v00 = v_reinterpret_as_s16(v_load(dsrc)); |
|
v01 = v_reinterpret_as_s16(v_load(dsrc + cn2)); |
|
v10 = v_reinterpret_as_s16(v_load(dsrc1)); |
|
v11 = v_reinterpret_as_s16(v_load(dsrc1 + cn2)); |
|
|
|
v_zip(v00, v01, t00, t01); |
|
v_zip(v10, v11, t10, t11); |
|
|
|
t0 = v_add(v_dotprod(t00, vqw0, vdelta_d), v_dotprod(t10, vqw1)); |
|
t1 = v_add(v_dotprod(t01, vqw0, vdelta_d), v_dotprod(t11, vqw1)); |
|
t0 = v_shr(t0, W_BITS); |
|
t1 = v_shr(t1, W_BITS); |
|
v00 = v_pack(t0, t1); // Ix0 Iy0 Ix1 Iy1 ... |
|
v00 = v_and(v00, v_reinterpret_as_s16(vmask0)); |
|
v_store(dIptr, v00); |
|
|
|
v00 = v_reinterpret_as_s16(v_load(dsrc + 4 * 2)); |
|
v01 = v_reinterpret_as_s16(v_load(dsrc + 4 * 2 + cn2)); |
|
v10 = v_reinterpret_as_s16(v_load(dsrc1 + 4 * 2)); |
|
v11 = v_reinterpret_as_s16(v_load(dsrc1 + 4 * 2 + cn2)); |
|
|
|
v_zip(v00, v01, t00, t01); |
|
v_zip(v10, v11, t10, t11); |
|
|
|
t0 = v_add(v_dotprod(t00, vqw0, vdelta_d), v_dotprod(t10, vqw1)); |
|
t1 = v_add(v_dotprod(t01, vqw0, vdelta_d), v_dotprod(t11, vqw1)); |
|
t0 = v_shr(t0, W_BITS); |
|
t1 = v_shr(t1, W_BITS); |
|
v00 = v_pack(t0, t1); // Ix0 Iy0 Ix1 Iy1 ... |
|
v00 = v_and(v00, v_reinterpret_as_s16(vmask1)); |
|
v_store(dIptr + 4 * 2, v00); |
|
} |
|
#else |
|
for (; x < winSize.width*cn; x++, dsrc += 2, dsrc1 += 2, dIptr += 2) |
|
{ |
|
if (maskPtr[x] == 0) |
|
{ |
|
dIptr[0] = 0; |
|
dIptr[1] = 0; |
|
continue; |
|
} |
|
int ival = CV_DESCALE(src[x] * iw00 + src[x + cn] * iw01 + |
|
src1[x] * iw10 + src1[x + cn] * iw11, W_BITS - 5); |
|
int ixval = CV_DESCALE(dsrc[0] * iw00 + dsrc[cn2] * iw01 + |
|
dsrc1[0] * iw10 + dsrc1[cn2] * iw11, W_BITS); |
|
int iyval = CV_DESCALE(dsrc[1] * iw00 + dsrc[cn2 + 1] * iw01 + dsrc1[1] * iw10 + |
|
dsrc1[cn2 + 1] * iw11, W_BITS); |
|
|
|
Iptr[x] = (short)ival; |
|
dIptr[0] = (short)ixval; |
|
dIptr[1] = (short)iyval; |
|
} |
|
#endif |
|
} |
|
} |
|
|
|
static inline void copyWinBuffers(int iw00, int iw01, int iw10, int iw11, |
|
Size winSize, |
|
const Mat & I, const Mat & derivI, const Mat & winMaskMat, |
|
Mat & IWinBuf, Mat & derivIWinBuf, |
|
float & A11, float & A22, float & A12, |
|
Point iprevPt) |
|
{ |
|
const float FLT_SCALE = (1.f / (1 << 20)); |
|
int cn = I.channels(), cn2 = cn * 2; |
|
const int W_BITS = 14; |
|
#if CV_SIMD128 |
|
v_int16x8 vqw0((short)(iw00), (short)(iw01), (short)(iw00), (short)(iw01), (short)(iw00), (short)(iw01), (short)(iw00), (short)(iw01)); |
|
v_int16x8 vqw1((short)(iw10), (short)(iw11), (short)(iw10), (short)(iw11), (short)(iw10), (short)(iw11), (short)(iw10), (short)(iw11)); |
|
v_int32x4 vdelta_d = v_setall_s32(1 << (W_BITS - 1)); |
|
v_int32x4 vdelta = v_setall_s32(1 << (W_BITS - 5 - 1)); |
|
v_int32x4 vmax_val_32 = v_setall_s32(std::numeric_limits<unsigned int>::max()); |
|
v_int32x4 vmask_border0, vmask_border1; |
|
v_float32x4 vA11 = v_setzero_f32(), vA22 = v_setzero_f32(), vA12 = v_setzero_f32(); |
|
getVBitMask(winSize.width, vmask_border0, vmask_border1); |
|
#endif |
|
|
|
// extract the patch from the first image, compute covariation matrix of derivatives |
|
for (int y = 0; y < winSize.height; y++) |
|
{ |
|
const uchar* src = I.ptr<uchar>(y + iprevPt.y, 0) + iprevPt.x*cn; |
|
const uchar* src1 = I.ptr<uchar>(y + iprevPt.y + 1, 0) + iprevPt.x*cn; |
|
const short* dsrc = derivI.ptr<short>(y + iprevPt.y, 0) + iprevPt.x*cn2; |
|
const short* dsrc1 = derivI.ptr<short>(y + iprevPt.y + 1, 0) + iprevPt.x*cn2; |
|
short* Iptr = IWinBuf.ptr<short>(y, 0); |
|
short* dIptr = derivIWinBuf.ptr<short>(y, 0); |
|
const tMaskType* maskPtr = winMaskMat.ptr<tMaskType>(y, 0); |
|
#if CV_SIMD128 |
|
for (int x = 0; x <= winSize.width*cn; x += 8, dsrc += 8 * 2, dsrc1 += 8 * 2, dIptr += 8 * 2) |
|
{ |
|
v_int32x4 vmask0 = v_mul(v_reinterpret_as_s32(v_load_expand_q(maskPtr + x)), vmax_val_32); |
|
v_int32x4 vmask1 = v_mul(v_reinterpret_as_s32(v_load_expand_q(maskPtr + x + 4)), vmax_val_32); |
|
if (x + 4 > winSize.width) |
|
{ |
|
vmask0 = v_and(vmask0, vmask_border0); |
|
} |
|
if (x + 8 > winSize.width) |
|
{ |
|
vmask1 = v_and(vmask1, vmask_border1); |
|
} |
|
|
|
v_int32x4 t0, t1; |
|
v_int16x8 v00, v01, v10, v11, t00, t01, t10, t11; |
|
v00 = v_reinterpret_as_s16(v_load_expand(src + x)); |
|
v01 = v_reinterpret_as_s16(v_load_expand(src + x + cn)); |
|
v10 = v_reinterpret_as_s16(v_load_expand(src1 + x)); |
|
v11 = v_reinterpret_as_s16(v_load_expand(src1 + x + cn)); |
|
|
|
v_zip(v00, v01, t00, t01); |
|
v_zip(v10, v11, t10, t11); |
|
t0 = v_add(v_dotprod(t00, vqw0, vdelta), v_dotprod(t10, vqw1)); |
|
t1 = v_add(v_dotprod(t01, vqw0, vdelta), v_dotprod(t11, vqw1)); |
|
t0 = v_shr(t0, W_BITS - 5); |
|
t1 = v_shr(t1, W_BITS - 5); |
|
t0 = v_and(t0, vmask0); |
|
t1 = v_and(t1, vmask1); |
|
v_store(Iptr + x, v_pack(t0, t1)); |
|
|
|
v00 = v_reinterpret_as_s16(v_load(dsrc)); |
|
v01 = v_reinterpret_as_s16(v_load(dsrc + cn2)); |
|
v10 = v_reinterpret_as_s16(v_load(dsrc1)); |
|
v11 = v_reinterpret_as_s16(v_load(dsrc1 + cn2)); |
|
|
|
v_zip(v00, v01, t00, t01); |
|
v_zip(v10, v11, t10, t11); |
|
|
|
t0 = v_add(v_dotprod(t00, vqw0, vdelta_d), v_dotprod(t10, vqw1)); |
|
t1 = v_add(v_dotprod(t01, vqw0, vdelta_d), v_dotprod(t11, vqw1)); |
|
t0 = v_shr(t0, W_BITS); |
|
t1 = v_shr(t1, W_BITS); |
|
v00 = v_pack(t0, t1); // Ix0 Iy0 Ix1 Iy1 ... |
|
v00 = v_and(v00, v_reinterpret_as_s16(vmask0)); |
|
v_store(dIptr, v00); |
|
|
|
v00 = v_reinterpret_as_s16(v_interleave_pairs(v_reinterpret_as_s32(v_interleave_pairs(v00)))); |
|
v_expand(v00, t1, t0); |
|
|
|
v_float32x4 fy = v_cvt_f32(t0); |
|
v_float32x4 fx = v_cvt_f32(t1); |
|
|
|
vA22 = v_muladd(fy, fy, vA22); |
|
vA12 = v_muladd(fx, fy, vA12); |
|
vA11 = v_muladd(fx, fx, vA11); |
|
|
|
v00 = v_reinterpret_as_s16(v_load(dsrc + 4 * 2)); |
|
v01 = v_reinterpret_as_s16(v_load(dsrc + 4 * 2 + cn2)); |
|
v10 = v_reinterpret_as_s16(v_load(dsrc1 + 4 * 2)); |
|
v11 = v_reinterpret_as_s16(v_load(dsrc1 + 4 * 2 + cn2)); |
|
|
|
v_zip(v00, v01, t00, t01); |
|
v_zip(v10, v11, t10, t11); |
|
|
|
t0 = v_add(v_dotprod(t00, vqw0, vdelta_d), v_dotprod(t10, vqw1)); |
|
t1 = v_add(v_dotprod(t01, vqw0, vdelta_d), v_dotprod(t11, vqw1)); |
|
t0 = v_shr(t0, W_BITS); |
|
t1 = v_shr(t1, W_BITS); |
|
v00 = v_pack(t0, t1); // Ix0 Iy0 Ix1 Iy1 ... |
|
v00 = v_and(v00, v_reinterpret_as_s16(vmask1)); |
|
v_store(dIptr + 4 * 2, v00); |
|
|
|
v00 = v_reinterpret_as_s16(v_interleave_pairs(v_reinterpret_as_s32(v_interleave_pairs(v00)))); |
|
v_expand(v00, t1, t0); |
|
|
|
fy = v_cvt_f32(t0); |
|
fx = v_cvt_f32(t1); |
|
|
|
vA22 = v_muladd(fy, fy, vA22); |
|
vA12 = v_muladd(fx, fy, vA12); |
|
vA11 = v_muladd(fx, fx, vA11); |
|
} |
|
#else |
|
for (int x = 0; x < winSize.width*cn; x++, dsrc += 2, dsrc1 += 2, dIptr += 2) |
|
{ |
|
if (maskPtr[x] == 0) |
|
{ |
|
dIptr[0] = 0; |
|
dIptr[1] = 0; |
|
continue; |
|
} |
|
int ival = CV_DESCALE(src[x] * iw00 + src[x + cn] * iw01 + |
|
src1[x] * iw10 + src1[x + cn] * iw11, W_BITS - 5); |
|
int ixval = CV_DESCALE(dsrc[0] * iw00 + dsrc[cn2] * iw01 + |
|
dsrc1[0] * iw10 + dsrc1[cn2] * iw11, W_BITS); |
|
int iyval = CV_DESCALE(dsrc[1] * iw00 + dsrc[cn2 + 1] * iw01 + dsrc1[1] * iw10 + |
|
dsrc1[cn2 + 1] * iw11, W_BITS); |
|
|
|
Iptr[x] = (short)ival; |
|
dIptr[0] = (short)ixval; |
|
dIptr[1] = (short)iyval; |
|
A11 += (float)(ixval*ixval); |
|
A12 += (float)(ixval*iyval); |
|
A22 += (float)(iyval*iyval); |
|
} |
|
#endif |
|
} |
|
#if CV_SIMD128 |
|
A11 += v_reduce_sum(vA11); |
|
A12 += v_reduce_sum(vA12); |
|
A22 += v_reduce_sum(vA22); |
|
#endif |
|
|
|
A11 *= FLT_SCALE; |
|
A12 *= FLT_SCALE; |
|
A22 *= FLT_SCALE; |
|
} |
|
|
|
static void getLocalPatch( |
|
const cv::Mat & src, |
|
const cv::Point2i & prevPoint, // feature points |
|
cv::Mat & winPointMask, |
|
int & noPoints, |
|
cv::Rect & winRoi, |
|
int minWinSize) |
|
{ |
|
int maxWinSizeH = (winPointMask.cols - 1) / 2; |
|
winRoi.x = prevPoint.x;// - maxWinSizeH; |
|
winRoi.y = prevPoint.y;// - maxWinSizeH; |
|
winRoi.width = winPointMask.cols; |
|
winRoi.height = winPointMask.rows; |
|
|
|
if( minWinSize == winPointMask.cols || prevPoint.x < 0 || prevPoint.y < 0 |
|
|| prevPoint.x + 2*maxWinSizeH >= src.cols || prevPoint.y + 2*maxWinSizeH >= src.rows) |
|
{ |
|
winRoi.x = prevPoint.x - maxWinSizeH; |
|
winRoi.y = prevPoint.y - maxWinSizeH; |
|
winPointMask.setTo(1); |
|
noPoints = winPointMask.size().area(); |
|
return; |
|
} |
|
winPointMask.setTo(0); |
|
noPoints = 0; |
|
int c = prevPoint.x + maxWinSizeH; |
|
int r = prevPoint.y + maxWinSizeH; |
|
int min_c = c; |
|
int max_c = c; |
|
int border_left = c - maxWinSizeH; |
|
int border_top = r - maxWinSizeH; |
|
cv::Vec4i bounds = src.at<cv::Vec4i>(r,c); |
|
int min_r = bounds.val[2]; |
|
int max_r = bounds.val[3]; |
|
|
|
for( int _r = min_r; _r <= max_r; _r++) |
|
{ |
|
cv::Rect roi(maxWinSizeH, _r - border_top, winPointMask.cols, 1); |
|
if( _r >= 0 && _r < src.cols) |
|
{ |
|
bounds = src.at<cv::Vec4i>(_r,c); |
|
roi.x = bounds.val[0] - border_left; |
|
roi.width = bounds.val[1] - bounds.val[0]; |
|
cv::Mat(winPointMask, roi).setTo(1); |
|
} |
|
else |
|
{ |
|
bounds.val[0] = border_left; |
|
bounds.val[1] = border_left + roi.width; |
|
} |
|
min_c = MIN(min_c, bounds.val[0]); |
|
max_c = MAX(max_c, bounds.val[1]); |
|
noPoints += roi.width; |
|
} |
|
|
|
if( noPoints < minWinSize * minWinSize) |
|
{ |
|
cv::Rect roi( winPointMask.cols / 2 - (minWinSize-1)/2, |
|
winPointMask.rows / 2 - (minWinSize-1)/2, |
|
minWinSize, minWinSize); |
|
cv::Mat(winPointMask, roi).setTo(1); |
|
roi.x += border_left; |
|
roi.y += border_top; |
|
min_c = MIN(MIN(min_c, roi.tl().x),roi.br().x); |
|
max_c = MAX(MAX(max_c, roi.tl().x),roi.br().x); |
|
min_r = MIN(MIN(min_r, roi.tl().y),roi.br().y); |
|
max_r = MAX(MAX(max_r, roi.tl().y),roi.br().y); |
|
noPoints += minWinSize * minWinSize; |
|
} |
|
winRoi.x = min_c - maxWinSizeH; |
|
winRoi.y = min_r - maxWinSizeH; |
|
winRoi.width = max_c - min_c; |
|
winRoi.height = max_r - min_r; |
|
winPointMask = winPointMask(cv::Rect(min_c - border_left, min_r - border_top, winRoi.width, winRoi.height)); |
|
} |
|
|
|
static inline |
|
bool calcWinMaskMat( |
|
const cv::Mat & BI, |
|
const int windowType, |
|
const cv::Point2i & iprevPt, |
|
cv::Mat & winMaskMat, |
|
cv::Size & winSize, |
|
cv::Point2f & halfWin, |
|
int & winArea, |
|
const int minWinSize, |
|
const int maxWinSize) |
|
{ |
|
if (windowType == SR_CROSS && maxWinSize != minWinSize) |
|
{ |
|
// patch generation |
|
cv::Rect winRoi; |
|
getLocalPatch(BI, iprevPt, winMaskMat, winArea, winRoi, minWinSize); |
|
if (winArea == 0) |
|
return false; |
|
winSize = winRoi.size(); |
|
halfWin = Point2f(static_cast<float>(iprevPt.x - winRoi.tl().x), |
|
static_cast<float>(iprevPt.y - winRoi.tl().y)); |
|
} |
|
else |
|
{ |
|
winSize = cv::Size(maxWinSize, maxWinSize); |
|
halfWin = Point2f((winSize.width - 1) / 2.f, (winSize.height - 1) / 2.f); |
|
winMaskMat.setTo(1); |
|
} |
|
return true; |
|
} |
|
|
|
static inline |
|
short estimateScale(cv::Mat & residuals) |
|
{ |
|
cv::Mat absMat = cv::abs(residuals); |
|
return quickselect<short>(absMat, absMat.rows / 2); |
|
} |
|
|
|
}} // namespace |
|
#endif
|
|
|