Merge pull request #2698 from DumDereDum:new_HashTSDF_implementation
New HashTSDF implementation * create new variables * rewrite reset() * first valid version of new HasHTSDF * some warning fixes * create lambda raycast * reduce time raycast * minor fix * minor fix volDims * changed _atVolumeUnit, reduce memory consumption * delete older inmplemetation of atVolumeUnit * changes _at * AAA, I want to cry! * it works! * it works twice o_o * minor fix * new adding to volumes * delete volDims at strust VolumeUnit * new names of vars * rename one var * minor fix * new resize volumes * rename volUnitsMatrix * minor fix in at function * add tsdf_functions.hpp * minor fix * remove two args at _at function signature * solved the link problem with tsdf_functions * build fix * build fix 1 * build fix 2 * build fix 3 * build fix 4 * replace integrateVolumeUnit to tsdf_functions and fix 2 warnings * docs fix * remove extra args at atVolumeUnit signature * change frame params checking * move volStrides to CPU class * inline convertion functions in tsdf_functions * minor fix * add SIMD version of integrateVolumeUnit * fix something :) * docs fix * warning fix * add degub asserts * replace vars initialization with reset() * remove volDims var * new resize buffer * minor vars name fix * docs fix * warning fix * minor fix * minor fix 1 * remove dbg asserts Co-authored-by: arsaratovtsev <artem.saratovtsev@intel.com>pull/2727/head
parent
7022f4e3e0
commit
0b5ded4637
5 changed files with 674 additions and 345 deletions
@ -0,0 +1,377 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
|
||||
|
||||
#include "precomp.hpp" |
||||
#include "tsdf_functions.hpp" |
||||
|
||||
namespace cv { |
||||
|
||||
namespace kinfu { |
||||
|
||||
cv::Mat preCalculationPixNorm(Depth depth, const Intr& intrinsics) |
||||
{ |
||||
int height = depth.rows; |
||||
int widht = depth.cols; |
||||
Point2f fl(intrinsics.fx, intrinsics.fy); |
||||
Point2f pp(intrinsics.cx, intrinsics.cy); |
||||
Mat pixNorm(height, widht, CV_32F); |
||||
std::vector<float> x(widht); |
||||
std::vector<float> y(height); |
||||
for (int i = 0; i < widht; i++) |
||||
x[i] = (i - pp.x) / fl.x; |
||||
for (int i = 0; i < height; i++) |
||||
y[i] = (i - pp.y) / fl.y; |
||||
|
||||
for (int i = 0; i < height; i++) |
||||
{ |
||||
for (int j = 0; j < widht; j++) |
||||
{ |
||||
pixNorm.at<float>(i, j) = sqrtf(x[j] * x[j] + y[i] * y[i] + 1.0f); |
||||
} |
||||
} |
||||
return pixNorm; |
||||
} |
||||
|
||||
const bool fixMissingData = false; |
||||
depthType bilinearDepth(const Depth& m, cv::Point2f pt) |
||||
{ |
||||
const depthType defaultValue = qnan; |
||||
if (pt.x < 0 || pt.x >= m.cols - 1 || |
||||
pt.y < 0 || pt.y >= m.rows - 1) |
||||
return defaultValue; |
||||
|
||||
int xi = cvFloor(pt.x), yi = cvFloor(pt.y); |
||||
|
||||
const depthType* row0 = m[yi + 0]; |
||||
const depthType* row1 = m[yi + 1]; |
||||
|
||||
depthType v00 = row0[xi + 0]; |
||||
depthType v01 = row0[xi + 1]; |
||||
depthType v10 = row1[xi + 0]; |
||||
depthType v11 = row1[xi + 1]; |
||||
|
||||
// assume correct depth is positive
|
||||
bool b00 = v00 > 0; |
||||
bool b01 = v01 > 0; |
||||
bool b10 = v10 > 0; |
||||
bool b11 = v11 > 0; |
||||
|
||||
if (!fixMissingData) |
||||
{ |
||||
if (!(b00 && b01 && b10 && b11)) |
||||
return defaultValue; |
||||
else |
||||
{ |
||||
float tx = pt.x - xi, ty = pt.y - yi; |
||||
depthType v0 = v00 + tx * (v01 - v00); |
||||
depthType v1 = v10 + tx * (v11 - v10); |
||||
return v0 + ty * (v1 - v0); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
int nz = b00 + b01 + b10 + b11; |
||||
if (nz == 0) |
||||
{ |
||||
return defaultValue; |
||||
} |
||||
if (nz == 1) |
||||
{ |
||||
if (b00) return v00; |
||||
if (b01) return v01; |
||||
if (b10) return v10; |
||||
if (b11) return v11; |
||||
} |
||||
if (nz == 2) |
||||
{ |
||||
if (b00 && b10) v01 = v00, v11 = v10; |
||||
if (b01 && b11) v00 = v01, v10 = v11; |
||||
if (b00 && b01) v10 = v00, v11 = v01; |
||||
if (b10 && b11) v00 = v10, v01 = v11; |
||||
if (b00 && b11) v01 = v10 = (v00 + v11) * 0.5f; |
||||
if (b01 && b10) v00 = v11 = (v01 + v10) * 0.5f; |
||||
} |
||||
if (nz == 3) |
||||
{ |
||||
if (!b00) v00 = v10 + v01 - v11; |
||||
if (!b01) v01 = v00 + v11 - v10; |
||||
if (!b10) v10 = v00 + v11 - v01; |
||||
if (!b11) v11 = v01 + v10 - v00; |
||||
} |
||||
|
||||
float tx = pt.x - xi, ty = pt.y - yi; |
||||
depthType v0 = v00 + tx * (v01 - v00); |
||||
depthType v1 = v10 + tx * (v11 - v10); |
||||
return v0 + ty * (v1 - v0); |
||||
} |
||||
} |
||||
|
||||
void integrateVolumeUnit( |
||||
float truncDist, float voxelSize, int maxWeight, |
||||
cv::Matx44f _pose, int volResolution, Vec4i volStrides, |
||||
InputArray _depth, float depthFactor, const cv::Matx44f& cameraPose, |
||||
const cv::kinfu::Intr& intrinsics, InputArray _pixNorms, InputArray _volume) |
||||
{ |
||||
CV_TRACE_FUNCTION(); |
||||
|
||||
CV_Assert(_depth.type() == DEPTH_TYPE); |
||||
CV_Assert(!_depth.empty()); |
||||
cv::Affine3f vpose(_pose); |
||||
Depth depth = _depth.getMat(); |
||||
|
||||
Range integrateRange(0, volResolution); |
||||
|
||||
Mat volume = _volume.getMat(); |
||||
Mat pixNorms = _pixNorms.getMat(); |
||||
const Intr::Projector proj(intrinsics.makeProjector()); |
||||
const cv::Affine3f vol2cam(Affine3f(cameraPose.inv()) * vpose); |
||||
const float truncDistInv(1.f / truncDist); |
||||
const float dfac(1.f / depthFactor); |
||||
TsdfVoxel* volDataStart = volume.ptr<TsdfVoxel>();; |
||||
|
||||
#if USE_INTRINSICS |
||||
auto IntegrateInvoker = [&](const Range& range) |
||||
{ |
||||
// zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
|
||||
Point3f zStepPt = Point3f(vol2cam.matrix(0, 2), |
||||
vol2cam.matrix(1, 2), |
||||
vol2cam.matrix(2, 2)) * voxelSize; |
||||
|
||||
v_float32x4 zStep(zStepPt.x, zStepPt.y, zStepPt.z, 0); |
||||
v_float32x4 vfxy(proj.fx, proj.fy, 0.f, 0.f), vcxy(proj.cx, proj.cy, 0.f, 0.f); |
||||
const v_float32x4 upLimits = v_cvt_f32(v_int32x4(depth.cols - 1, depth.rows - 1, 0, 0)); |
||||
|
||||
for (int x = range.start; x < range.end; x++) |
||||
{ |
||||
TsdfVoxel* volDataX = volDataStart + x * volStrides[0]; |
||||
for (int y = 0; y < volResolution; y++) |
||||
{ |
||||
TsdfVoxel* volDataY = volDataX + y * volStrides[1]; |
||||
// optimization of camSpace transformation (vector addition instead of matmul at each z)
|
||||
Point3f basePt = vol2cam * (Point3f((float)x, (float)y, 0) * voxelSize); |
||||
v_float32x4 camSpacePt(basePt.x, basePt.y, basePt.z, 0); |
||||
|
||||
int startZ, endZ; |
||||
if (abs(zStepPt.z) > 1e-5) |
||||
{ |
||||
int baseZ = (int)(-basePt.z / zStepPt.z); |
||||
if (zStepPt.z > 0) |
||||
{ |
||||
startZ = baseZ; |
||||
endZ = volResolution; |
||||
} |
||||
else |
||||
{ |
||||
startZ = 0; |
||||
endZ = baseZ; |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if (basePt.z > 0) |
||||
{ |
||||
startZ = 0; |
||||
endZ = volResolution; |
||||
} |
||||
else |
||||
{ |
||||
// z loop shouldn't be performed
|
||||
startZ = endZ = 0; |
||||
} |
||||
} |
||||
startZ = max(0, startZ); |
||||
endZ = min(int(volResolution), endZ); |
||||
for (int z = startZ; z < endZ; z++) |
||||
{ |
||||
// optimization of the following:
|
||||
//Point3f volPt = Point3f(x, y, z)*voxelSize;
|
||||
//Point3f camSpacePt = vol2cam * volPt;
|
||||
camSpacePt += zStep; |
||||
|
||||
float zCamSpace = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(camSpacePt))).get0(); |
||||
if (zCamSpace <= 0.f) |
||||
continue; |
||||
|
||||
v_float32x4 camPixVec = camSpacePt / v_setall_f32(zCamSpace); |
||||
v_float32x4 projected = v_muladd(camPixVec, vfxy, vcxy); |
||||
// leave only first 2 lanes
|
||||
projected = v_reinterpret_as_f32(v_reinterpret_as_u32(projected) & |
||||
v_uint32x4(0xFFFFFFFF, 0xFFFFFFFF, 0, 0)); |
||||
|
||||
depthType v; |
||||
// bilinearly interpolate depth at projected
|
||||
{ |
||||
const v_float32x4& pt = projected; |
||||
// check coords >= 0 and < imgSize
|
||||
v_uint32x4 limits = v_reinterpret_as_u32(pt < v_setzero_f32()) | |
||||
v_reinterpret_as_u32(pt >= upLimits); |
||||
limits = limits | v_rotate_right<1>(limits); |
||||
if (limits.get0()) |
||||
continue; |
||||
|
||||
// xi, yi = floor(pt)
|
||||
v_int32x4 ip = v_floor(pt); |
||||
v_int32x4 ipshift = ip; |
||||
int xi = ipshift.get0(); |
||||
ipshift = v_rotate_right<1>(ipshift); |
||||
int yi = ipshift.get0(); |
||||
|
||||
const depthType* row0 = depth[yi + 0]; |
||||
const depthType* row1 = depth[yi + 1]; |
||||
|
||||
// v001 = [v(xi + 0, yi + 0), v(xi + 1, yi + 0)]
|
||||
v_float32x4 v001 = v_load_low(row0 + xi); |
||||
// v101 = [v(xi + 0, yi + 1), v(xi + 1, yi + 1)]
|
||||
v_float32x4 v101 = v_load_low(row1 + xi); |
||||
|
||||
v_float32x4 vall = v_combine_low(v001, v101); |
||||
|
||||
// assume correct depth is positive
|
||||
// don't fix missing data
|
||||
if (v_check_all(vall > v_setzero_f32())) |
||||
{ |
||||
v_float32x4 t = pt - v_cvt_f32(ip); |
||||
float tx = t.get0(); |
||||
t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t))); |
||||
v_float32x4 ty = v_setall_f32(t.get0()); |
||||
// vx is y-interpolated between rows 0 and 1
|
||||
v_float32x4 vx = v001 + ty * (v101 - v001); |
||||
float v0 = vx.get0(); |
||||
vx = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vx))); |
||||
float v1 = vx.get0(); |
||||
v = v0 + tx * (v1 - v0); |
||||
} |
||||
else |
||||
continue; |
||||
} |
||||
|
||||
// norm(camPixVec) produces double which is too slow
|
||||
int _u = (int)projected.get0(); |
||||
int _v = (int)v_rotate_right<1>(projected).get0(); |
||||
if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows)) |
||||
continue; |
||||
float pixNorm = pixNorms.at<float>(_v, _u); |
||||
// float pixNorm = sqrt(v_reduce_sum(camPixVec*camPixVec));
|
||||
// difference between distances of point and of surface to camera
|
||||
float sdf = pixNorm * (v * dfac - zCamSpace); |
||||
// possible alternative is:
|
||||
// kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
|
||||
if (sdf >= -truncDist) |
||||
{ |
||||
TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv)); |
||||
|
||||
TsdfVoxel& voxel = volDataY[z * volStrides[2]]; |
||||
WeightType& weight = voxel.weight; |
||||
TsdfType& value = voxel.tsdf; |
||||
|
||||
// update TSDF
|
||||
value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1)); |
||||
weight = (weight + 1) < maxWeight ? (weight + 1) : (WeightType) maxWeight; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
}; |
||||
#else |
||||
auto IntegrateInvoker = [&](const Range& range) |
||||
{ |
||||
for (int x = range.start; x < range.end; x++) |
||||
{ |
||||
TsdfVoxel* volDataX = volDataStart + x * volStrides[0]; |
||||
for (int y = 0; y < volResolution; y++) |
||||
{ |
||||
TsdfVoxel* volDataY = volDataX + y * volStrides[1]; |
||||
// optimization of camSpace transformation (vector addition instead of matmul at each z)
|
||||
Point3f basePt = vol2cam * (Point3f(float(x), float(y), 0.0f) * voxelSize); |
||||
Point3f camSpacePt = basePt; |
||||
// zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
|
||||
// zStep == vol2cam*[Point3f(x, y, 1) - Point3f(x, y, 0)]*voxelSize
|
||||
Point3f zStep = Point3f(vol2cam.matrix(0, 2), |
||||
vol2cam.matrix(1, 2), |
||||
vol2cam.matrix(2, 2)) * voxelSize; |
||||
int startZ, endZ; |
||||
if (abs(zStep.z) > 1e-5) |
||||
{ |
||||
int baseZ = int(-basePt.z / zStep.z); |
||||
if (zStep.z > 0) |
||||
{ |
||||
startZ = baseZ; |
||||
endZ = volResolution; |
||||
} |
||||
else |
||||
{ |
||||
startZ = 0; |
||||
endZ = baseZ; |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if (basePt.z > 0) |
||||
{ |
||||
startZ = 0; |
||||
endZ = volResolution; |
||||
} |
||||
else |
||||
{ |
||||
// z loop shouldn't be performed
|
||||
startZ = endZ = 0; |
||||
} |
||||
} |
||||
startZ = max(0, startZ); |
||||
endZ = min(int(volResolution), endZ); |
||||
|
||||
for (int z = startZ; z < endZ; z++) |
||||
{ |
||||
// optimization of the following:
|
||||
//Point3f volPt = Point3f(x, y, z)*volume.voxelSize;
|
||||
//Point3f camSpacePt = vol2cam * volPt;
|
||||
|
||||
camSpacePt += zStep; |
||||
if (camSpacePt.z <= 0) |
||||
continue; |
||||
|
||||
Point3f camPixVec; |
||||
Point2f projected = proj(camSpacePt, camPixVec); |
||||
|
||||
depthType v = bilinearDepth(depth, projected); |
||||
if (v == 0) { |
||||
continue; |
||||
} |
||||
|
||||
int _u = projected.x; |
||||
int _v = projected.y; |
||||
if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows)) |
||||
continue; |
||||
float pixNorm = pixNorms.at<float>(_v, _u); |
||||
|
||||
// difference between distances of point and of surface to camera
|
||||
float sdf = pixNorm * (v * dfac - camSpacePt.z); |
||||
// possible alternative is:
|
||||
// kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
|
||||
if (sdf >= -truncDist) |
||||
{ |
||||
TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv)); |
||||
|
||||
TsdfVoxel& voxel = volDataY[z * volStrides[2]]; |
||||
WeightType& weight = voxel.weight; |
||||
TsdfType& value = voxel.tsdf; |
||||
|
||||
// update TSDF
|
||||
value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1)); |
||||
weight = min(int(weight + 1), int(maxWeight)); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
}; |
||||
#endif |
||||
|
||||
parallel_for_(integrateRange, IntegrateInvoker); |
||||
|
||||
} |
||||
|
||||
} // namespace kinfu
|
||||
} // namespace cv
|
@ -0,0 +1,48 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
|
||||
|
||||
#ifndef __OPENCV_TSDF_FUNCTIONS_H__ |
||||
#define __OPENCV_TSDF_FUNCTIONS_H__ |
||||
|
||||
#include <opencv2/rgbd/volume.hpp> |
||||
#include "tsdf.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace kinfu |
||||
{ |
||||
|
||||
inline v_float32x4 tsdfToFloat_INTR(const v_int32x4& num) |
||||
{ |
||||
v_float32x4 num128 = v_setall_f32(-1.f / 128.f); |
||||
return v_cvt_f32(num) * num128; |
||||
} |
||||
|
||||
inline TsdfType floatToTsdf(float num) |
||||
{ |
||||
//CV_Assert(-1 < num <= 1);
|
||||
int8_t res = int8_t(num * (-128.f)); |
||||
res = res ? res : (num < 0 ? 1 : -1); |
||||
return res; |
||||
} |
||||
|
||||
inline float tsdfToFloat(TsdfType num) |
||||
{ |
||||
return float(num) * (-1.f / 128.f); |
||||
} |
||||
|
||||
cv::Mat preCalculationPixNorm(Depth depth, const Intr& intrinsics); |
||||
depthType bilinearDepth(const Depth& m, cv::Point2f pt); |
||||
|
||||
void integrateVolumeUnit( |
||||
float truncDist, float voxelSize, int maxWeight, |
||||
cv::Matx44f _pose, int volResolution, Vec4i volStrides, |
||||
InputArray _depth, float depthFactor, const cv::Matx44f& cameraPose, |
||||
const cv::kinfu::Intr& intrinsics, InputArray _pixNorms, InputArray _volume); |
||||
|
||||
} // namespace kinfu
|
||||
} // namespace cv
|
||||
#endif |
Loading…
Reference in new issue