mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1242 lines
57 KiB
1242 lines
57 KiB
/*M/////////////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
|
// |
|
// By downloading, copying, installing or using the software you agree to this license. |
|
// If you do not agree to this license, do not download, install, |
|
// copy or use the software. |
|
// |
|
// |
|
// License Agreement |
|
// For Open Source Computer Vision Library |
|
// |
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
|
// Third party copyrights are property of their respective owners. |
|
// |
|
// Redistribution and use in source and binary forms, with or without modification, |
|
// are permitted provided that the following conditions are met: |
|
// |
|
// * Redistribution's of source code must retain the above copyright notice, |
|
// this list of conditions and the following disclaimer. |
|
// |
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
|
// this list of conditions and the following disclaimer in the documentation |
|
// and/or other materials provided with the distribution. |
|
// |
|
// * The name of the copyright holders may not be used to endorse or promote products |
|
// derived from this software without specific prior written permission. |
|
// |
|
// This software is provided by the copyright holders and contributors "as is" and |
|
// any express or implied warranties, including, but not limited to, the implied |
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
|
// indirect, incidental, special, exemplary, or consequential damages |
|
// (including, but not limited to, procurement of substitute goods or services; |
|
// loss of use, data, or profits; or business interruption) however caused |
|
// and on any theory of liability, whether in contract, strict liability, |
|
// or tort (including negligence or otherwise) arising in any way out of |
|
// the use of this software, even if advised of the possibility of such damage. |
|
// |
|
//M*/ |
|
|
|
#include "precomp.hpp" |
|
#include "opencv2/core/hal/intrin.hpp" |
|
|
|
using namespace std; |
|
|
|
namespace cv |
|
{ |
|
|
|
class VariationalRefinementImpl CV_FINAL : public VariationalRefinement |
|
{ |
|
public: |
|
VariationalRefinementImpl(); |
|
|
|
void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; |
|
void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) CV_OVERRIDE; |
|
void collectGarbage() CV_OVERRIDE; |
|
|
|
protected: //!< algorithm parameters |
|
int fixedPointIterations, sorIterations; |
|
float omega; |
|
float alpha, delta, gamma; |
|
float zeta, epsilon; |
|
|
|
public: |
|
int getFixedPointIterations() const CV_OVERRIDE { return fixedPointIterations; } |
|
void setFixedPointIterations(int val) CV_OVERRIDE { fixedPointIterations = val; } |
|
int getSorIterations() const CV_OVERRIDE { return sorIterations; } |
|
void setSorIterations(int val) CV_OVERRIDE { sorIterations = val; } |
|
float getOmega() const CV_OVERRIDE { return omega; } |
|
void setOmega(float val) CV_OVERRIDE { omega = val; } |
|
float getAlpha() const CV_OVERRIDE { return alpha; } |
|
void setAlpha(float val) CV_OVERRIDE { alpha = val; } |
|
float getDelta() const CV_OVERRIDE { return delta; } |
|
void setDelta(float val) CV_OVERRIDE { delta = val; } |
|
float getGamma() const CV_OVERRIDE { return gamma; } |
|
void setGamma(float val) CV_OVERRIDE { gamma = val; } |
|
|
|
protected: //!< internal buffers |
|
/* This struct defines a special data layout for Mat_<float>. Original buffer is split into two: one for "red" |
|
* elements (sum of indices is even) and one for "black" (sum of indices is odd) in a checkerboard pattern. It |
|
* allows for more efficient processing in SOR iterations, more natural SIMD vectorization and parallelization |
|
* (Red-Black SOR). Additionally, it simplifies border handling by adding repeated borders to both red and |
|
* black buffers. |
|
*/ |
|
struct RedBlackBuffer |
|
{ |
|
Mat_<float> red; //!< (i+j)%2==0 |
|
Mat_<float> black; //!< (i+j)%2==1 |
|
|
|
/* Width of even and odd rows may be different */ |
|
int red_even_len, red_odd_len; |
|
int black_even_len, black_odd_len; |
|
|
|
RedBlackBuffer(); |
|
void create(Size s); |
|
void release(); |
|
}; |
|
|
|
Mat_<float> Ix, Iy, Iz, Ixx, Ixy, Iyy, Ixz, Iyz; //!< image derivative buffers |
|
RedBlackBuffer Ix_rb, Iy_rb, Iz_rb, Ixx_rb, Ixy_rb, Iyy_rb, Ixz_rb, Iyz_rb; //!< corresponding red-black buffers |
|
|
|
RedBlackBuffer A11, A12, A22, b1, b2; //!< main linear system coefficients |
|
RedBlackBuffer weights; //!< smoothness term weights in the current fixed point iteration |
|
|
|
Mat_<float> mapX, mapY; //!< auxiliary buffers for remapping |
|
|
|
RedBlackBuffer tempW_u, tempW_v; //!< flow buffers that are modified in each fixed point iteration |
|
RedBlackBuffer dW_u, dW_v; //!< optical flow increment |
|
RedBlackBuffer W_u_rb, W_v_rb; //!< red-black-buffer version of the input flow |
|
|
|
private: //!< private methods and parallel sections |
|
void splitCheckerboard(RedBlackBuffer &dst, Mat &src); |
|
void mergeCheckerboard(Mat &dst, RedBlackBuffer &src); |
|
void updateRepeatedBorders(RedBlackBuffer &dst); |
|
void warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v); |
|
void prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v); |
|
|
|
/* Parallelizing arbitrary operations with 3 input/output arguments */ |
|
typedef void (VariationalRefinementImpl::*Op)(void *op1, void *op2, void *op3); |
|
struct ParallelOp_ParBody : public ParallelLoopBody |
|
{ |
|
VariationalRefinementImpl *var; |
|
vector<Op> ops; |
|
vector<void *> op1s; |
|
vector<void *> op2s; |
|
vector<void *> op3s; |
|
|
|
ParallelOp_ParBody(VariationalRefinementImpl &_var, vector<Op> _ops, vector<void *> &_op1s, |
|
vector<void *> &_op2s, vector<void *> &_op3s); |
|
void operator()(const Range &range) const CV_OVERRIDE; |
|
}; |
|
void gradHorizAndSplitOp(void *src, void *dst, void *dst_split) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE); |
|
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); |
|
} |
|
void gradVertAndSplitOp(void *src, void *dst, void *dst_split) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE); |
|
splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); |
|
} |
|
void averageOp(void *src1, void *src2, void *dst) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F); |
|
} |
|
void subtractOp(void *src1, void *src2, void *dst) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F); |
|
} |
|
|
|
struct ComputeDataTerm_ParBody : public ParallelLoopBody |
|
{ |
|
VariationalRefinementImpl *var; |
|
int nstripes, stripe_sz; |
|
int h; |
|
RedBlackBuffer *dW_u, *dW_v; |
|
bool red_pass; |
|
|
|
ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, |
|
RedBlackBuffer &_dW_v, bool _red_pass); |
|
void operator()(const Range &range) const CV_OVERRIDE; |
|
}; |
|
|
|
struct ComputeSmoothnessTermHorPass_ParBody : public ParallelLoopBody |
|
{ |
|
VariationalRefinementImpl *var; |
|
int nstripes, stripe_sz; |
|
int h; |
|
RedBlackBuffer *W_u, *W_v, *curW_u, *curW_v; |
|
bool red_pass; |
|
|
|
ComputeSmoothnessTermHorPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, |
|
RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, RedBlackBuffer &_tempW_u, |
|
RedBlackBuffer &_tempW_v, bool _red_pass); |
|
void operator()(const Range &range) const CV_OVERRIDE; |
|
}; |
|
|
|
struct ComputeSmoothnessTermVertPass_ParBody : public ParallelLoopBody |
|
{ |
|
VariationalRefinementImpl *var; |
|
int nstripes, stripe_sz; |
|
int h; |
|
RedBlackBuffer *W_u, *W_v; |
|
bool red_pass; |
|
|
|
ComputeSmoothnessTermVertPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, |
|
RedBlackBuffer &W_u, RedBlackBuffer &_W_v, bool _red_pass); |
|
void operator()(const Range &range) const CV_OVERRIDE; |
|
}; |
|
|
|
struct RedBlackSOR_ParBody : public ParallelLoopBody |
|
{ |
|
VariationalRefinementImpl *var; |
|
int nstripes, stripe_sz; |
|
int h; |
|
RedBlackBuffer *dW_u, *dW_v; |
|
bool red_pass; |
|
|
|
RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, |
|
RedBlackBuffer &_dW_v, bool _red_pass); |
|
void operator()(const Range &range) const CV_OVERRIDE; |
|
}; |
|
}; |
|
|
|
VariationalRefinementImpl::VariationalRefinementImpl() |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
fixedPointIterations = 5; |
|
sorIterations = 5; |
|
alpha = 20.0f; |
|
delta = 5.0f; |
|
gamma = 10.0f; |
|
omega = 1.6f; |
|
zeta = 0.1f; |
|
epsilon = 0.001f; |
|
} |
|
|
|
/* This function converts an input Mat into the RedBlackBuffer format, which involves |
|
* splitting the input buffer into two and adding repeated borders. Assumes that enough |
|
* memory in dst is already allocated. |
|
*/ |
|
void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int buf_j, j; |
|
int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders |
|
|
|
/* Rows of red and black buffers can have different actual width, some extra repeated values are |
|
* added for padding in such cases. |
|
*/ |
|
for (int i = 0; i < src.rows; i++) |
|
{ |
|
float *src_buf = src.ptr<float>(i); |
|
float *r_buf = dst.red.ptr<float>(i + 1); |
|
float *b_buf = dst.black.ptr<float>(i + 1); |
|
r_buf[0] = b_buf[0] = src_buf[0]; |
|
buf_j = 1; |
|
if (i % 2 == 0) |
|
{ |
|
for (j = 0; j < src.cols - 1; j += 2) |
|
{ |
|
r_buf[buf_j] = src_buf[j]; |
|
b_buf[buf_j] = src_buf[j + 1]; |
|
buf_j++; |
|
} |
|
if (j < src.cols) |
|
r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; |
|
else |
|
j--; |
|
} |
|
else |
|
{ |
|
for (j = 0; j < src.cols - 1; j += 2) |
|
{ |
|
b_buf[buf_j] = src_buf[j]; |
|
r_buf[buf_j] = src_buf[j + 1]; |
|
buf_j++; |
|
} |
|
if (j < src.cols) |
|
r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; |
|
else |
|
j--; |
|
} |
|
r_buf[buf_w - 1] = b_buf[buf_w - 1] = src_buf[j]; |
|
} |
|
|
|
/* Fill top and bottom borders: */ |
|
{ |
|
float *r_buf_border = dst.red.ptr<float>(dst.red.rows - 1); |
|
float *b_buf_border = dst.black.ptr<float>(dst.black.rows - 1); |
|
float *r_buf = dst.red.ptr<float>(dst.red.rows - 2); |
|
float *b_buf = dst.black.ptr<float>(dst.black.rows - 2); |
|
memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); |
|
memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); |
|
} |
|
{ |
|
float *r_buf_border = dst.red.ptr<float>(0); |
|
float *b_buf_border = dst.black.ptr<float>(0); |
|
float *r_buf = dst.red.ptr<float>(1); |
|
float *b_buf = dst.black.ptr<float>(1); |
|
memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); |
|
memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); |
|
} |
|
} |
|
|
|
/* The inverse of splitCheckerboard, i.e. converting the RedBlackBuffer back into Mat. |
|
* Assumes that enough memory in dst is already allocated. |
|
*/ |
|
void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int buf_j, j; |
|
for (int i = 0; i < dst.rows; i++) |
|
{ |
|
float *src_r_buf = src.red.ptr<float>(i + 1); |
|
float *src_b_buf = src.black.ptr<float>(i + 1); |
|
float *dst_buf = dst.ptr<float>(i); |
|
buf_j = 1; |
|
|
|
if (i % 2 == 0) |
|
{ |
|
for (j = 0; j < dst.cols - 1; j += 2) |
|
{ |
|
dst_buf[j] = src_r_buf[buf_j]; |
|
dst_buf[j + 1] = src_b_buf[buf_j]; |
|
buf_j++; |
|
} |
|
if (j < dst.cols) |
|
dst_buf[j] = src_r_buf[buf_j]; |
|
} |
|
else |
|
{ |
|
for (j = 0; j < dst.cols - 1; j += 2) |
|
{ |
|
dst_buf[j] = src_b_buf[buf_j]; |
|
dst_buf[j + 1] = src_r_buf[buf_j]; |
|
buf_j++; |
|
} |
|
if (j < dst.cols) |
|
dst_buf[j] = src_b_buf[buf_j]; |
|
} |
|
} |
|
} |
|
|
|
/* An auxiliary function that updates the borders. Used to enforce that border values repeat |
|
* the ones adjacent to the border. |
|
*/ |
|
void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int buf_w = dst.red.cols; |
|
for (int i = 0; i < dst.red.rows - 2; i++) |
|
{ |
|
float *r_buf = dst.red.ptr<float>(i + 1); |
|
float *b_buf = dst.black.ptr<float>(i + 1); |
|
|
|
if (i % 2 == 0) |
|
{ |
|
b_buf[0] = r_buf[1]; |
|
if (dst.red_even_len > dst.black_even_len) |
|
b_buf[dst.black_even_len + 1] = r_buf[dst.red_even_len]; |
|
else |
|
r_buf[dst.red_even_len + 1] = b_buf[dst.black_even_len]; |
|
} |
|
else |
|
{ |
|
r_buf[0] = b_buf[1]; |
|
if (dst.red_odd_len < dst.black_odd_len) |
|
r_buf[dst.red_odd_len + 1] = b_buf[dst.black_odd_len]; |
|
else |
|
b_buf[dst.black_odd_len + 1] = r_buf[dst.red_odd_len]; |
|
} |
|
} |
|
{ |
|
float *r_buf_border = dst.red.ptr<float>(dst.red.rows - 1); |
|
float *b_buf_border = dst.black.ptr<float>(dst.black.rows - 1); |
|
float *r_buf = dst.red.ptr<float>(dst.red.rows - 2); |
|
float *b_buf = dst.black.ptr<float>(dst.black.rows - 2); |
|
memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); |
|
memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); |
|
} |
|
{ |
|
float *r_buf_border = dst.red.ptr<float>(0); |
|
float *b_buf_border = dst.black.ptr<float>(0); |
|
float *r_buf = dst.red.ptr<float>(1); |
|
float *b_buf = dst.black.ptr<float>(1); |
|
memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); |
|
memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); |
|
} |
|
} |
|
|
|
VariationalRefinementImpl::RedBlackBuffer::RedBlackBuffer() |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
release(); |
|
} |
|
void VariationalRefinementImpl::RedBlackBuffer::create(Size s) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
/* Allocate enough memory to include borders */ |
|
int w = (int)ceil(s.width / 2.0) + 2; |
|
red.create(s.height + 2, w); |
|
black.create(s.height + 2, w); |
|
|
|
if (s.width % 2 == 0) |
|
red_even_len = red_odd_len = black_even_len = black_odd_len = w - 2; |
|
else |
|
{ |
|
red_even_len = black_odd_len = w - 2; |
|
red_odd_len = black_even_len = w - 3; |
|
} |
|
} |
|
|
|
void VariationalRefinementImpl::RedBlackBuffer::release() |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
red.release(); |
|
black.release(); |
|
red_even_len = red_odd_len = black_even_len = black_odd_len = 0; |
|
} |
|
|
|
VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRefinementImpl &_var, vector<Op> _ops, |
|
vector<void *> &_op1s, vector<void *> &_op2s, |
|
vector<void *> &_op3s) |
|
: var(&_var), ops(_ops), op1s(_op1s), op2s(_op2s), op3s(_op3s) |
|
{ |
|
} |
|
|
|
void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
for (int i = range.start; i < range.end; i++) |
|
(var->*ops[i])(op1s[i], op2s[i], op3s[i]); |
|
} |
|
|
|
void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
for (int i = 0; i < flow_u.rows; i++) |
|
{ |
|
float *pFlowU = flow_u.ptr<float>(i); |
|
float *pFlowV = flow_v.ptr<float>(i); |
|
float *pMapX = mapX.ptr<float>(i); |
|
float *pMapY = mapY.ptr<float>(i); |
|
for (int j = 0; j < flow_u.cols; j++) |
|
{ |
|
pMapX[j] = j + pFlowU[j]; |
|
pMapY[j] = i + pFlowV[j]; |
|
} |
|
} |
|
remap(src, dst, mapX, mapY, INTER_LINEAR, BORDER_REPLICATE); |
|
} |
|
|
|
void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
Size s = I0.size(); |
|
A11.create(s); |
|
A12.create(s); |
|
A22.create(s); |
|
b1.create(s); |
|
b2.create(s); |
|
weights.create(s); |
|
weights.red.setTo(0.0f); |
|
weights.black.setTo(0.0f); |
|
tempW_u.create(s); |
|
tempW_v.create(s); |
|
dW_u.create(s); |
|
dW_v.create(s); |
|
W_u_rb.create(s); |
|
W_v_rb.create(s); |
|
|
|
Ix.create(s); |
|
Iy.create(s); |
|
Iz.create(s); |
|
Ixx.create(s); |
|
Ixy.create(s); |
|
Iyy.create(s); |
|
Ixz.create(s); |
|
Iyz.create(s); |
|
|
|
Ix_rb.create(s); |
|
Iy_rb.create(s); |
|
Iz_rb.create(s); |
|
Ixx_rb.create(s); |
|
Ixy_rb.create(s); |
|
Iyy_rb.create(s); |
|
Ixz_rb.create(s); |
|
Iyz_rb.create(s); |
|
|
|
mapX.create(s); |
|
mapY.create(s); |
|
|
|
/* Floating point warps work significantly better than fixed-point */ |
|
Mat I1flt, warpedI; |
|
I1.convertTo(I1flt, CV_32F); |
|
warpImage(warpedI, I1flt, W_u, W_v); |
|
|
|
/* Computing an average of the current and warped next frames (to compute the derivatives on) and |
|
* temporal derivative Iz |
|
*/ |
|
Mat averagedI; |
|
{ |
|
vector<void *> op1s; |
|
op1s.push_back((void *)&I0); |
|
op1s.push_back((void *)&warpedI); |
|
vector<void *> op2s; |
|
op2s.push_back((void *)&warpedI); |
|
op2s.push_back((void *)&I0); |
|
vector<void *> op3s; |
|
op3s.push_back((void *)&averagedI); |
|
op3s.push_back((void *)&Iz); |
|
vector<Op> ops; |
|
ops.push_back(&VariationalRefinementImpl::averageOp); |
|
ops.push_back(&VariationalRefinementImpl::subtractOp); |
|
parallel_for_(Range(0, 2), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); |
|
} |
|
splitCheckerboard(Iz_rb, Iz); |
|
|
|
/* Computing first-order derivatives */ |
|
{ |
|
vector<void *> op1s; |
|
op1s.push_back((void *)&averagedI); |
|
op1s.push_back((void *)&averagedI); |
|
op1s.push_back((void *)&Iz); |
|
op1s.push_back((void *)&Iz); |
|
vector<void *> op2s; |
|
op2s.push_back((void *)&Ix); |
|
op2s.push_back((void *)&Iy); |
|
op2s.push_back((void *)&Ixz); |
|
op2s.push_back((void *)&Iyz); |
|
vector<void *> op3s; |
|
op3s.push_back((void *)&Ix_rb); |
|
op3s.push_back((void *)&Iy_rb); |
|
op3s.push_back((void *)&Ixz_rb); |
|
op3s.push_back((void *)&Iyz_rb); |
|
vector<Op> ops; |
|
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); |
|
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); |
|
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); |
|
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); |
|
parallel_for_(Range(0, 4), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); |
|
} |
|
|
|
/* Computing second-order derivatives */ |
|
{ |
|
vector<void *> op1s; |
|
op1s.push_back((void *)&Ix); |
|
op1s.push_back((void *)&Ix); |
|
op1s.push_back((void *)&Iy); |
|
vector<void *> op2s; |
|
op2s.push_back((void *)&Ixx); |
|
op2s.push_back((void *)&Ixy); |
|
op2s.push_back((void *)&Iyy); |
|
vector<void *> op3s; |
|
op3s.push_back((void *)&Ixx_rb); |
|
op3s.push_back((void *)&Ixy_rb); |
|
op3s.push_back((void *)&Iyy_rb); |
|
vector<Op> ops; |
|
ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); |
|
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); |
|
ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); |
|
parallel_for_(Range(0, 3), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); |
|
} |
|
} |
|
|
|
VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, |
|
int _nstripes, int _h, |
|
RedBlackBuffer &_dW_u, |
|
RedBlackBuffer &_dW_v, bool _red_pass) |
|
: var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) |
|
{ |
|
stripe_sz = (int)ceil(h / (double)nstripes); |
|
} |
|
|
|
/* This function computes parts of the main linear system coefficients A11,A12,A22,b1,b1 |
|
* that correspond to the data term, which includes color and gradient constancy assumptions. |
|
*/ |
|
void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int start_i = min(range.start * stripe_sz, h); |
|
int end_i = min(range.end * stripe_sz, h); |
|
|
|
float zeta_squared = var->zeta * var->zeta; |
|
float epsilon_squared = var->epsilon * var->epsilon; |
|
float gamma2 = var->gamma / 2; |
|
float delta2 = var->delta / 2; |
|
|
|
float *pIx, *pIy, *pIz; |
|
float *pIxx, *pIxy, *pIyy, *pIxz, *pIyz; |
|
float *pdU, *pdV; |
|
float *pa11, *pa12, *pa22, *pb1, *pb2; |
|
|
|
float derivNorm, derivNorm2; |
|
float Ik1z, Ik1zx, Ik1zy; |
|
float weight; |
|
int len; |
|
for (int i = start_i; i < end_i; i++) |
|
{ |
|
#define INIT_ROW_POINTERS(color) \ |
|
pIx = var->Ix_rb.color.ptr<float>(i + 1) + 1; \ |
|
pIy = var->Iy_rb.color.ptr<float>(i + 1) + 1; \ |
|
pIz = var->Iz_rb.color.ptr<float>(i + 1) + 1; \ |
|
pIxx = var->Ixx_rb.color.ptr<float>(i + 1) + 1; \ |
|
pIxy = var->Ixy_rb.color.ptr<float>(i + 1) + 1; \ |
|
pIyy = var->Iyy_rb.color.ptr<float>(i + 1) + 1; \ |
|
pIxz = var->Ixz_rb.color.ptr<float>(i + 1) + 1; \ |
|
pIyz = var->Iyz_rb.color.ptr<float>(i + 1) + 1; \ |
|
pa11 = var->A11.color.ptr<float>(i + 1) + 1; \ |
|
pa12 = var->A12.color.ptr<float>(i + 1) + 1; \ |
|
pa22 = var->A22.color.ptr<float>(i + 1) + 1; \ |
|
pb1 = var->b1.color.ptr<float>(i + 1) + 1; \ |
|
pb2 = var->b2.color.ptr<float>(i + 1) + 1; \ |
|
pdU = dW_u->color.ptr<float>(i + 1) + 1; \ |
|
pdV = dW_v->color.ptr<float>(i + 1) + 1; \ |
|
if (i % 2 == 0) \ |
|
len = var->Ix_rb.color##_even_len; \ |
|
else \ |
|
len = var->Ix_rb.color##_odd_len; |
|
|
|
if (red_pass) |
|
{ |
|
INIT_ROW_POINTERS(red); |
|
} |
|
else |
|
{ |
|
INIT_ROW_POINTERS(black); |
|
} |
|
#undef INIT_ROW_POINTERS |
|
|
|
int j = 0; |
|
#if CV_SIMD128 |
|
v_float32x4 zeta_vec = v_setall_f32(zeta_squared); |
|
v_float32x4 eps_vec = v_setall_f32(epsilon_squared); |
|
v_float32x4 delta_vec = v_setall_f32(delta2); |
|
v_float32x4 gamma_vec = v_setall_f32(gamma2); |
|
v_float32x4 zero_vec = v_setall_f32(0.0f); |
|
v_float32x4 pIx_vec, pIy_vec, pIz_vec, pdU_vec, pdV_vec; |
|
v_float32x4 pIxx_vec, pIxy_vec, pIyy_vec, pIxz_vec, pIyz_vec; |
|
v_float32x4 derivNorm_vec, derivNorm2_vec, weight_vec; |
|
v_float32x4 Ik1z_vec, Ik1zx_vec, Ik1zy_vec; |
|
v_float32x4 pa11_vec, pa12_vec, pa22_vec, pb1_vec, pb2_vec; |
|
|
|
for (; j < len - 3; j += 4) |
|
{ |
|
pIx_vec = v_load(pIx + j); |
|
pIy_vec = v_load(pIy + j); |
|
pIz_vec = v_load(pIz + j); |
|
pdU_vec = v_load(pdU + j); |
|
pdV_vec = v_load(pdV + j); |
|
|
|
derivNorm_vec = pIx_vec * pIx_vec + pIy_vec * pIy_vec + zeta_vec; |
|
Ik1z_vec = pIz_vec + pIx_vec * pdU_vec + pIy_vec * pdV_vec; |
|
weight_vec = (delta_vec / v_sqrt(Ik1z_vec * Ik1z_vec / derivNorm_vec + eps_vec)) / derivNorm_vec; |
|
|
|
pa11_vec = weight_vec * (pIx_vec * pIx_vec) + zeta_vec; |
|
pa12_vec = weight_vec * (pIx_vec * pIy_vec); |
|
pa22_vec = weight_vec * (pIy_vec * pIy_vec) + zeta_vec; |
|
pb1_vec = zero_vec - weight_vec * (pIz_vec * pIx_vec); |
|
pb2_vec = zero_vec - weight_vec * (pIz_vec * pIy_vec); |
|
|
|
pIxx_vec = v_load(pIxx + j); |
|
pIxy_vec = v_load(pIxy + j); |
|
pIyy_vec = v_load(pIyy + j); |
|
pIxz_vec = v_load(pIxz + j); |
|
pIyz_vec = v_load(pIyz + j); |
|
|
|
derivNorm_vec = pIxx_vec * pIxx_vec + pIxy_vec * pIxy_vec + zeta_vec; |
|
derivNorm2_vec = pIyy_vec * pIyy_vec + pIxy_vec * pIxy_vec + zeta_vec; |
|
Ik1zx_vec = pIxz_vec + pIxx_vec * pdU_vec + pIxy_vec * pdV_vec; |
|
Ik1zy_vec = pIyz_vec + pIxy_vec * pdU_vec + pIyy_vec * pdV_vec; |
|
weight_vec = gamma_vec / v_sqrt(Ik1zx_vec * Ik1zx_vec / derivNorm_vec + |
|
Ik1zy_vec * Ik1zy_vec / derivNorm2_vec + eps_vec); |
|
|
|
pa11_vec += weight_vec * (pIxx_vec * pIxx_vec / derivNorm_vec + pIxy_vec * pIxy_vec / derivNorm2_vec); |
|
pa12_vec += weight_vec * (pIxx_vec * pIxy_vec / derivNorm_vec + pIxy_vec * pIyy_vec / derivNorm2_vec); |
|
pa22_vec += weight_vec * (pIxy_vec * pIxy_vec / derivNorm_vec + pIyy_vec * pIyy_vec / derivNorm2_vec); |
|
pb1_vec -= weight_vec * (pIxx_vec * pIxz_vec / derivNorm_vec + pIxy_vec * pIyz_vec / derivNorm2_vec); |
|
pb2_vec -= weight_vec * (pIxy_vec * pIxz_vec / derivNorm_vec + pIyy_vec * pIyz_vec / derivNorm2_vec); |
|
|
|
v_store(pa11 + j, pa11_vec); |
|
v_store(pa12 + j, pa12_vec); |
|
v_store(pa22 + j, pa22_vec); |
|
v_store(pb1 + j, pb1_vec); |
|
v_store(pb2 + j, pb2_vec); |
|
} |
|
#endif |
|
for (; j < len; j++) |
|
{ |
|
/* Step 1: Compute color constancy terms */ |
|
/* Normalization factor:*/ |
|
derivNorm = pIx[j] * pIx[j] + pIy[j] * pIy[j] + zeta_squared; |
|
/* Color constancy penalty (computed by Taylor expansion):*/ |
|
Ik1z = pIz[j] + pIx[j] * pdU[j] + pIy[j] * pdV[j]; |
|
/* Weight of the color constancy term in the current fixed-point iteration divided by derivNorm: */ |
|
weight = (delta2 / sqrt(Ik1z * Ik1z / derivNorm + epsilon_squared)) / derivNorm; |
|
/* Add respective color constancy terms to the linear system coefficients: */ |
|
pa11[j] = weight * (pIx[j] * pIx[j]) + zeta_squared; |
|
pa12[j] = weight * (pIx[j] * pIy[j]); |
|
pa22[j] = weight * (pIy[j] * pIy[j]) + zeta_squared; |
|
pb1[j] = -weight * (pIz[j] * pIx[j]); |
|
pb2[j] = -weight * (pIz[j] * pIy[j]); |
|
|
|
/* Step 2: Compute gradient constancy terms */ |
|
/* Normalization factor for x gradient: */ |
|
derivNorm = pIxx[j] * pIxx[j] + pIxy[j] * pIxy[j] + zeta_squared; |
|
/* Normalization factor for y gradient: */ |
|
derivNorm2 = pIyy[j] * pIyy[j] + pIxy[j] * pIxy[j] + zeta_squared; |
|
/* Gradient constancy penalties (computed by Taylor expansion): */ |
|
Ik1zx = pIxz[j] + pIxx[j] * pdU[j] + pIxy[j] * pdV[j]; |
|
Ik1zy = pIyz[j] + pIxy[j] * pdU[j] + pIyy[j] * pdV[j]; |
|
/* Weight of the gradient constancy term in the current fixed-point iteration: */ |
|
weight = gamma2 / sqrt(Ik1zx * Ik1zx / derivNorm + Ik1zy * Ik1zy / derivNorm2 + epsilon_squared); |
|
/* Add respective gradient constancy components to the linear system coefficients: */ |
|
pa11[j] += weight * (pIxx[j] * pIxx[j] / derivNorm + pIxy[j] * pIxy[j] / derivNorm2); |
|
pa12[j] += weight * (pIxx[j] * pIxy[j] / derivNorm + pIxy[j] * pIyy[j] / derivNorm2); |
|
pa22[j] += weight * (pIxy[j] * pIxy[j] / derivNorm + pIyy[j] * pIyy[j] / derivNorm2); |
|
pb1[j] += -weight * (pIxx[j] * pIxz[j] / derivNorm + pIxy[j] * pIyz[j] / derivNorm2); |
|
pb2[j] += -weight * (pIxy[j] * pIxz[j] / derivNorm + pIyy[j] * pIyz[j] / derivNorm2); |
|
} |
|
} |
|
} |
|
|
|
VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothnessTermHorPass_ParBody( |
|
VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, |
|
RedBlackBuffer &_tempW_u, RedBlackBuffer &_tempW_v, bool _red_pass) |
|
: var(&_var), nstripes(_nstripes), h(_h), W_u(&_W_u), W_v(&_W_v), curW_u(&_tempW_u), curW_v(&_tempW_v), |
|
red_pass(_red_pass) |
|
{ |
|
stripe_sz = (int)ceil(h / (double)nstripes); |
|
} |
|
|
|
/* This function updates the linear system coefficients A11,A22,b1,b1 according to the |
|
* flow smoothness term and computes corresponding weights for the current fixed point iteration. |
|
* A11,A22,b1,b1 are updated only partially (horizontal pass). Doing both horizontal and vertical |
|
* passes in one loop complicates parallelization (different threads write to the same elements). |
|
*/ |
|
void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int start_i = min(range.start * stripe_sz, h); |
|
int end_i = min(range.end * stripe_sz, h); |
|
|
|
float epsilon_squared = var->epsilon * var->epsilon; |
|
float alpha2 = var->alpha / 2; |
|
float *pWeight; |
|
float *pA_u, *pA_u_next, *pA_v, *pA_v_next; |
|
float *pB_u, *pB_u_next, *pB_v, *pB_v_next; |
|
float *cW_u, *cW_u_next, *cW_u_next_row; |
|
float *cW_v, *cW_v_next, *cW_v_next_row; |
|
float *pW_u, *pW_u_next; |
|
float *pW_v, *pW_v_next; |
|
float ux, uy, vx, vy; |
|
int len; |
|
bool touches_right_border = true; |
|
|
|
#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd, bool_default) \ |
|
pWeight = var->weights.cur_color.ptr<float>(i + 1) + 1; \ |
|
pA_u = var->A11.cur_color.ptr<float>(i + 1) + 1; \ |
|
pB_u = var->b1.cur_color.ptr<float>(i + 1) + 1; \ |
|
cW_u = curW_u->cur_color.ptr<float>(i + 1) + 1; \ |
|
pW_u = W_u->cur_color.ptr<float>(i + 1) + 1; \ |
|
pA_v = var->A22.cur_color.ptr<float>(i + 1) + 1; \ |
|
pB_v = var->b2.cur_color.ptr<float>(i + 1) + 1; \ |
|
cW_v = curW_v->cur_color.ptr<float>(i + 1) + 1; \ |
|
pW_v = W_v->cur_color.ptr<float>(i + 1) + 1; \ |
|
\ |
|
cW_u_next_row = curW_u->next_color.ptr<float>(i + 2) + 1; \ |
|
cW_v_next_row = curW_v->next_color.ptr<float>(i + 2) + 1; \ |
|
\ |
|
if (i % 2 == 0) \ |
|
{ \ |
|
pA_u_next = var->A11.next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
pB_u_next = var->b1.next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
cW_u_next = curW_u->next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
pW_u_next = W_u->next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
pA_v_next = var->A22.next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
pB_v_next = var->b2.next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
cW_v_next = curW_v->next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
pW_v_next = W_v->next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
len = var->A11.cur_color##_even_len; \ |
|
if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ |
|
touches_right_border = bool_default; \ |
|
else \ |
|
touches_right_border = !bool_default; \ |
|
} \ |
|
else \ |
|
{ \ |
|
pA_u_next = var->A11.next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
pB_u_next = var->b1.next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
cW_u_next = curW_u->next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
pW_u_next = W_u->next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
pA_v_next = var->A22.next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
pB_v_next = var->b2.next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
cW_v_next = curW_v->next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
pW_v_next = W_v->next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
len = var->A11.cur_color##_odd_len; \ |
|
if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ |
|
touches_right_border = !bool_default; \ |
|
else \ |
|
touches_right_border = bool_default; \ |
|
} |
|
|
|
for (int i = start_i; i < end_i; i++) |
|
{ |
|
if (red_pass) |
|
{ |
|
INIT_ROW_POINTERS(red, black, 1, 2, true); |
|
} |
|
else |
|
{ |
|
INIT_ROW_POINTERS(black, red, 2, 1, false); |
|
} |
|
#undef INIT_ROW_POINTERS |
|
|
|
#define COMPUTE \ |
|
/* Gradients for the flow on the current fixed-point iteration: */ \ |
|
ux = cW_u_next[j] - cW_u[j]; \ |
|
vx = cW_v_next[j] - cW_v[j]; \ |
|
uy = cW_u_next_row[j] - cW_u[j]; \ |
|
vy = cW_v_next_row[j] - cW_v[j]; \ |
|
/* Weight of the smoothness term in the current fixed-point iteration: */ \ |
|
pWeight[j] = alpha2 / sqrt(ux * ux + vx * vx + uy * uy + vy * vy + epsilon_squared); \ |
|
/* Gradients for initial raw flow multiplied by weight:*/ \ |
|
ux = pWeight[j] * (pW_u_next[j] - pW_u[j]); \ |
|
vx = pWeight[j] * (pW_v_next[j] - pW_v[j]); |
|
|
|
#define UPDATE \ |
|
pB_u[j] += ux; \ |
|
pA_u[j] += pWeight[j]; \ |
|
pB_v[j] += vx; \ |
|
pA_v[j] += pWeight[j]; \ |
|
pB_u_next[j] -= ux; \ |
|
pA_u_next[j] += pWeight[j]; \ |
|
pB_v_next[j] -= vx; \ |
|
pA_v_next[j] += pWeight[j]; |
|
|
|
int j = 0; |
|
#if CV_SIMD128 |
|
v_float32x4 alpha2_vec = v_setall_f32(alpha2); |
|
v_float32x4 eps_vec = v_setall_f32(epsilon_squared); |
|
v_float32x4 cW_u_vec, cW_v_vec; |
|
v_float32x4 pWeight_vec, ux_vec, vx_vec, uy_vec, vy_vec; |
|
|
|
for (; j < len - 4; j += 4) |
|
{ |
|
cW_u_vec = v_load(cW_u + j); |
|
cW_v_vec = v_load(cW_v + j); |
|
|
|
ux_vec = v_load(cW_u_next + j) - cW_u_vec; |
|
vx_vec = v_load(cW_v_next + j) - cW_v_vec; |
|
uy_vec = v_load(cW_u_next_row + j) - cW_u_vec; |
|
vy_vec = v_load(cW_v_next_row + j) - cW_v_vec; |
|
pWeight_vec = |
|
alpha2_vec / v_sqrt(ux_vec * ux_vec + vx_vec * vx_vec + uy_vec * uy_vec + vy_vec * vy_vec + eps_vec); |
|
v_store(pWeight + j, pWeight_vec); |
|
|
|
ux_vec = pWeight_vec * (v_load(pW_u_next + j) - v_load(pW_u + j)); |
|
vx_vec = pWeight_vec * (v_load(pW_v_next + j) - v_load(pW_v + j)); |
|
|
|
v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); |
|
v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); |
|
v_store(pB_u + j, v_load(pB_u + j) + ux_vec); |
|
v_store(pB_v + j, v_load(pB_v + j) + vx_vec); |
|
|
|
v_store(pA_u_next + j, v_load(pA_u_next + j) + pWeight_vec); |
|
v_store(pA_v_next + j, v_load(pA_v_next + j) + pWeight_vec); |
|
v_store(pB_u_next + j, v_load(pB_u_next + j) - ux_vec); |
|
v_store(pB_v_next + j, v_load(pB_v_next + j) - vx_vec); |
|
} |
|
#endif |
|
for (; j < len - 1; j++) |
|
{ |
|
COMPUTE; |
|
UPDATE; |
|
} |
|
|
|
/* Omit the update on the rightmost elements */ |
|
if (touches_right_border) |
|
{ |
|
COMPUTE; |
|
} |
|
else |
|
{ |
|
COMPUTE; |
|
UPDATE; |
|
} |
|
} |
|
#undef COMPUTE |
|
#undef UPDATE |
|
} |
|
|
|
VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothnessTermVertPass_ParBody( |
|
VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, bool _red_pass) |
|
: var(&_var), nstripes(_nstripes), W_u(&_W_u), W_v(&_W_v), red_pass(_red_pass) |
|
{ |
|
/* Omit the last row in the vertical pass */ |
|
h = _h - 1; |
|
stripe_sz = (int)ceil(h / (double)nstripes); |
|
} |
|
|
|
/* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */ |
|
void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int start_i = min(range.start * stripe_sz, h); |
|
int end_i = min(range.end * stripe_sz, h); |
|
|
|
float *pWeight; |
|
float *pA_u, *pA_u_next_row, *pA_v, *pA_v_next_row; |
|
float *pB_u, *pB_u_next_row, *pB_v, *pB_v_next_row; |
|
float *pW_u, *pW_u_next_row, *pW_v, *pW_v_next_row; |
|
float vy, uy; |
|
int len; |
|
|
|
for (int i = start_i; i < end_i; i++) |
|
{ |
|
#define INIT_ROW_POINTERS(cur_color, next_color) \ |
|
pWeight = var->weights.cur_color.ptr<float>(i + 1) + 1; \ |
|
pA_u = var->A11.cur_color.ptr<float>(i + 1) + 1; \ |
|
pB_u = var->b1.cur_color.ptr<float>(i + 1) + 1; \ |
|
pW_u = W_u->cur_color.ptr<float>(i + 1) + 1; \ |
|
pA_v = var->A22.cur_color.ptr<float>(i + 1) + 1; \ |
|
pB_v = var->b2.cur_color.ptr<float>(i + 1) + 1; \ |
|
pW_v = W_v->cur_color.ptr<float>(i + 1) + 1; \ |
|
\ |
|
pA_u_next_row = var->A11.next_color.ptr<float>(i + 2) + 1; \ |
|
pB_u_next_row = var->b1.next_color.ptr<float>(i + 2) + 1; \ |
|
pW_u_next_row = W_u->next_color.ptr<float>(i + 2) + 1; \ |
|
pA_v_next_row = var->A22.next_color.ptr<float>(i + 2) + 1; \ |
|
pB_v_next_row = var->b2.next_color.ptr<float>(i + 2) + 1; \ |
|
pW_v_next_row = W_v->next_color.ptr<float>(i + 2) + 1; \ |
|
\ |
|
if (i % 2 == 0) \ |
|
len = var->A11.cur_color##_even_len; \ |
|
else \ |
|
len = var->A11.cur_color##_odd_len; |
|
|
|
if (red_pass) |
|
{ |
|
INIT_ROW_POINTERS(red, black); |
|
} |
|
else |
|
{ |
|
INIT_ROW_POINTERS(black, red); |
|
} |
|
#undef INIT_ROW_POINTERS |
|
|
|
int j = 0; |
|
#if CV_SIMD128 |
|
v_float32x4 pWeight_vec, uy_vec, vy_vec; |
|
for (; j < len - 3; j += 4) |
|
{ |
|
pWeight_vec = v_load(pWeight + j); |
|
uy_vec = pWeight_vec * (v_load(pW_u_next_row + j) - v_load(pW_u + j)); |
|
vy_vec = pWeight_vec * (v_load(pW_v_next_row + j) - v_load(pW_v + j)); |
|
|
|
v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); |
|
v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); |
|
v_store(pB_u + j, v_load(pB_u + j) + uy_vec); |
|
v_store(pB_v + j, v_load(pB_v + j) + vy_vec); |
|
|
|
v_store(pA_u_next_row + j, v_load(pA_u_next_row + j) + pWeight_vec); |
|
v_store(pA_v_next_row + j, v_load(pA_v_next_row + j) + pWeight_vec); |
|
v_store(pB_u_next_row + j, v_load(pB_u_next_row + j) - uy_vec); |
|
v_store(pB_v_next_row + j, v_load(pB_v_next_row + j) - vy_vec); |
|
} |
|
#endif |
|
for (; j < len; j++) |
|
{ |
|
uy = pWeight[j] * (pW_u_next_row[j] - pW_u[j]); |
|
vy = pWeight[j] * (pW_v_next_row[j] - pW_v[j]); |
|
pB_u[j] += uy; |
|
pA_u[j] += pWeight[j]; |
|
pB_v[j] += vy; |
|
pA_v[j] += pWeight[j]; |
|
pB_u_next_row[j] -= uy; |
|
pA_u_next_row[j] += pWeight[j]; |
|
pB_v_next_row[j] -= vy; |
|
pA_v_next_row[j] += pWeight[j]; |
|
} |
|
} |
|
} |
|
|
|
VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, |
|
int _h, RedBlackBuffer &_dW_u, |
|
RedBlackBuffer &_dW_v, bool _red_pass) |
|
: var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) |
|
{ |
|
stripe_sz = (int)ceil(h / (double)nstripes); |
|
} |
|
|
|
/* This function implements the Red-Black SOR (successive-over relaxation) method for solving the main |
|
* linear system in the current fixed-point iteration. |
|
*/ |
|
void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int start = min(range.start * stripe_sz, h); |
|
int end = min(range.end * stripe_sz, h); |
|
|
|
float *pa11, *pa12, *pa22, *pb1, *pb2, *pW, *pdu, *pdv; |
|
float *pW_next, *pdu_next, *pdv_next; |
|
float *pW_prev_row, *pdu_prev_row, *pdv_prev_row; |
|
float *pdu_next_row, *pdv_next_row; |
|
|
|
float sigmaU, sigmaV; |
|
int j, len; |
|
for (int i = start; i < end; i++) |
|
{ |
|
#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd) \ |
|
pW = var->weights.cur_color.ptr<float>(i + 1) + 1; \ |
|
pa11 = var->A11.cur_color.ptr<float>(i + 1) + 1; \ |
|
pa12 = var->A12.cur_color.ptr<float>(i + 1) + 1; \ |
|
pa22 = var->A22.cur_color.ptr<float>(i + 1) + 1; \ |
|
pb1 = var->b1.cur_color.ptr<float>(i + 1) + 1; \ |
|
pb2 = var->b2.cur_color.ptr<float>(i + 1) + 1; \ |
|
pdu = dW_u->cur_color.ptr<float>(i + 1) + 1; \ |
|
pdv = dW_v->cur_color.ptr<float>(i + 1) + 1; \ |
|
\ |
|
pdu_next_row = dW_u->next_color.ptr<float>(i + 2) + 1; \ |
|
pdv_next_row = dW_v->next_color.ptr<float>(i + 2) + 1; \ |
|
\ |
|
pW_prev_row = var->weights.next_color.ptr<float>(i) + 1; \ |
|
pdu_prev_row = dW_u->next_color.ptr<float>(i) + 1; \ |
|
pdv_prev_row = dW_v->next_color.ptr<float>(i) + 1; \ |
|
\ |
|
if (i % 2 == 0) \ |
|
{ \ |
|
pW_next = var->weights.next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
pdu_next = dW_u->next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
pdv_next = dW_v->next_color.ptr<float>(i + 1) + next_offs_even; \ |
|
len = var->A11.cur_color##_even_len; \ |
|
} \ |
|
else \ |
|
{ \ |
|
pW_next = var->weights.next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
pdu_next = dW_u->next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
pdv_next = dW_v->next_color.ptr<float>(i + 1) + next_offs_odd; \ |
|
len = var->A11.cur_color##_odd_len; \ |
|
} |
|
if (red_pass) |
|
{ |
|
INIT_ROW_POINTERS(red, black, 1, 2); |
|
} |
|
else |
|
{ |
|
INIT_ROW_POINTERS(black, red, 2, 1); |
|
} |
|
#undef INIT_ROW_POINTERS |
|
|
|
j = 0; |
|
#if CV_SIMD128 |
|
v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]); |
|
v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]); |
|
v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]); |
|
v_float32x4 omega_vec = v_setall_f32(var->omega); |
|
v_float32x4 pW_vec, pW_next_vec, pW_prev_row_vec; |
|
v_float32x4 pdu_next_vec, pdu_prev_row_vec, pdu_next_row_vec; |
|
v_float32x4 pdv_next_vec, pdv_prev_row_vec, pdv_next_row_vec; |
|
v_float32x4 pW_shifted_vec, pdu_shifted_vec, pdv_shifted_vec; |
|
v_float32x4 pa12_vec, sigmaU_vec, sigmaV_vec, pdu_vec, pdv_vec; |
|
for (; j < len - 3; j += 4) |
|
{ |
|
pW_vec = v_load(pW + j); |
|
pW_next_vec = v_load(pW_next + j); |
|
pW_prev_row_vec = v_load(pW_prev_row + j); |
|
pdu_next_vec = v_load(pdu_next + j); |
|
pdu_prev_row_vec = v_load(pdu_prev_row + j); |
|
pdu_next_row_vec = v_load(pdu_next_row + j); |
|
pdv_next_vec = v_load(pdv_next + j); |
|
pdv_prev_row_vec = v_load(pdv_prev_row + j); |
|
pdv_next_row_vec = v_load(pdv_next_row + j); |
|
pa12_vec = v_load(pa12 + j); |
|
pW_shifted_vec = v_reinterpret_as_f32( |
|
v_extract<3>(v_reinterpret_as_s32(pW_prev_vec), v_reinterpret_as_s32(pW_next_vec))); |
|
pdu_shifted_vec = v_reinterpret_as_f32( |
|
v_extract<3>(v_reinterpret_as_s32(pdu_prev_vec), v_reinterpret_as_s32(pdu_next_vec))); |
|
pdv_shifted_vec = v_reinterpret_as_f32( |
|
v_extract<3>(v_reinterpret_as_s32(pdv_prev_vec), v_reinterpret_as_s32(pdv_next_vec))); |
|
|
|
sigmaU_vec = pW_shifted_vec * pdu_shifted_vec + pW_vec * pdu_next_vec + pW_prev_row_vec * pdu_prev_row_vec + |
|
pW_vec * pdu_next_row_vec; |
|
sigmaV_vec = pW_shifted_vec * pdv_shifted_vec + pW_vec * pdv_next_vec + pW_prev_row_vec * pdv_prev_row_vec + |
|
pW_vec * pdv_next_row_vec; |
|
|
|
pdu_vec = v_load(pdu + j); |
|
pdv_vec = v_load(pdv + j); |
|
pdu_vec += omega_vec * ((sigmaU_vec + v_load(pb1 + j) - pdv_vec * pa12_vec) / v_load(pa11 + j) - pdu_vec); |
|
pdv_vec += omega_vec * ((sigmaV_vec + v_load(pb2 + j) - pdu_vec * pa12_vec) / v_load(pa22 + j) - pdv_vec); |
|
v_store(pdu + j, pdu_vec); |
|
v_store(pdv + j, pdv_vec); |
|
|
|
pW_prev_vec = pW_next_vec; |
|
pdu_prev_vec = pdu_next_vec; |
|
pdv_prev_vec = pdv_next_vec; |
|
} |
|
#endif |
|
for (; j < len; j++) |
|
{ |
|
sigmaU = pW_next[j - 1] * pdu_next[j - 1] + pW[j] * pdu_next[j] + pW_prev_row[j] * pdu_prev_row[j] + |
|
pW[j] * pdu_next_row[j]; |
|
sigmaV = pW_next[j - 1] * pdv_next[j - 1] + pW[j] * pdv_next[j] + pW_prev_row[j] * pdv_prev_row[j] + |
|
pW[j] * pdv_next_row[j]; |
|
pdu[j] += var->omega * ((sigmaU + pb1[j] - pdv[j] * pa12[j]) / pa11[j] - pdu[j]); |
|
pdv[j] += var->omega * ((sigmaV + pb2[j] - pdu[j] * pa12[j]) / pa22[j] - pdv[j]); |
|
} |
|
} |
|
} |
|
|
|
void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
CV_Assert(!I0.empty() && I0.channels() == 1); |
|
CV_Assert(!I1.empty() && I1.channels() == 1); |
|
CV_Assert(I0.sameSize(I1)); |
|
CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F)); |
|
CV_Assert(!flow.empty() && flow.depth() == CV_32F && flow.channels() == 2); |
|
CV_Assert(I0.sameSize(flow)); |
|
|
|
Mat uv[2]; |
|
Mat &flowMat = flow.getMatRef(); |
|
split(flowMat, uv); |
|
calcUV(I0, I1, uv[0], uv[1]); |
|
merge(uv, 2, flowMat); |
|
} |
|
|
|
void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
CV_Assert(!I0.empty() && I0.channels() == 1); |
|
CV_Assert(!I1.empty() && I1.channels() == 1); |
|
CV_Assert(I0.sameSize(I1)); |
|
CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F)); |
|
CV_Assert(!flow_u.empty() && flow_u.depth() == CV_32F && flow_u.channels() == 1); |
|
CV_Assert(!flow_v.empty() && flow_v.depth() == CV_32F && flow_v.channels() == 1); |
|
CV_Assert(I0.sameSize(flow_u)); |
|
CV_Assert(flow_u.sameSize(flow_v)); |
|
|
|
int num_stripes = getNumThreads(); |
|
Mat I0Mat = I0.getMat(); |
|
Mat I1Mat = I1.getMat(); |
|
Mat &W_u = flow_u.getMatRef(); |
|
Mat &W_v = flow_v.getMatRef(); |
|
prepareBuffers(I0Mat, I1Mat, W_u, W_v); |
|
|
|
splitCheckerboard(W_u_rb, W_u); |
|
splitCheckerboard(W_v_rb, W_v); |
|
W_u_rb.red.copyTo(tempW_u.red); |
|
W_u_rb.black.copyTo(tempW_u.black); |
|
W_v_rb.red.copyTo(tempW_v.red); |
|
W_v_rb.black.copyTo(tempW_v.black); |
|
dW_u.red.setTo(0.0f); |
|
dW_u.black.setTo(0.0f); |
|
dW_v.red.setTo(0.0f); |
|
dW_v.black.setTo(0.0f); |
|
|
|
for (int i = 0; i < fixedPointIterations; i++) |
|
{ |
|
CV_TRACE_REGION("fixedPoint_iteration"); |
|
|
|
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); |
|
parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); |
|
|
|
parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( |
|
*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, true)); |
|
parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( |
|
*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, false)); |
|
|
|
parallel_for_(Range(0, num_stripes), |
|
ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, true)); |
|
parallel_for_(Range(0, num_stripes), |
|
ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, false)); |
|
|
|
for (int j = 0; j < sorIterations; j++) |
|
{ |
|
CV_TRACE_REGION("SOR_iteration"); |
|
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); |
|
parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); |
|
} |
|
|
|
tempW_u.red = W_u_rb.red + dW_u.red; |
|
tempW_u.black = W_u_rb.black + dW_u.black; |
|
updateRepeatedBorders(tempW_u); |
|
tempW_v.red = W_v_rb.red + dW_v.red; |
|
tempW_v.black = W_v_rb.black + dW_v.black; |
|
updateRepeatedBorders(tempW_v); |
|
} |
|
mergeCheckerboard(W_u, tempW_u); |
|
mergeCheckerboard(W_v, tempW_v); |
|
} |
|
void VariationalRefinementImpl::collectGarbage() |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
Ix.release(); |
|
Iy.release(); |
|
Iz.release(); |
|
Ixx.release(); |
|
Ixy.release(); |
|
Iyy.release(); |
|
Ixz.release(); |
|
Iyz.release(); |
|
|
|
Ix_rb.release(); |
|
Iy_rb.release(); |
|
Iz_rb.release(); |
|
Ixx_rb.release(); |
|
Ixy_rb.release(); |
|
Iyy_rb.release(); |
|
Ixz_rb.release(); |
|
Iyz_rb.release(); |
|
|
|
A11.release(); |
|
A12.release(); |
|
A22.release(); |
|
b1.release(); |
|
b2.release(); |
|
weights.release(); |
|
|
|
mapX.release(); |
|
mapY.release(); |
|
|
|
tempW_u.release(); |
|
tempW_v.release(); |
|
dW_u.release(); |
|
dW_v.release(); |
|
W_u_rb.release(); |
|
W_v_rb.release(); |
|
} |
|
|
|
Ptr<VariationalRefinement> VariationalRefinement::create() |
|
{ return makePtr<VariationalRefinementImpl>(); } |
|
|
|
}
|
|
|