KinectFusion big update: OpenCL support, etc. (#1798)

* KinFu demo: idle mode added

* undistort added

* KinFu demo: depth recorder added

* TSDFVolume gets voxelSize, voxelSizeInv, truncDist members; decorative fixes

* TSDFVolumeGPU::integrate(): host code compiles

* TSDFVolume: truncDist fixed

* TSDFVolume::integrate(): initial OCL version is done

* TSDFVolume::integrate(): OCL: minor fixes

* kinfu: small fixes

* TSDFVolume::raycast(): initial GPU version is done

* USE_INTRINSICS directive for centralized enable/disable opt. code

* TSDF Volume supports 3 different sizes/resolutions on each dimension

* TSDFVolume: serviceMembers moved to parent class

* TSDFVolumeGPU: mem order changed, gave perf boost 4x

* Frame: fixed UMat as InputArray; TSDF: comments, TODOs, minor fixes

* Frame::getPointsNormals(); minors

* FrameGPU: initial version (not working)

* minor

* FrameGPU: several fixes

* added OCL "fast" options

* ICP OCL: initial slow version is done (host-side reduce)

* ICP OCL: reduce is done, buggy

* KinFu Frame: more args fixes

* ICP OCL: small fixes to kernel

* ICP OCL reduce works

* OCL code refactored

* ICP OCL: less allocations, better speed

* ICP OCL: pose matrix made arg

* Render OCL: small fix

* Demo: using UMats everywhere

* TSDF integrate OCL: put const arg into kernel arg

* Platform parameter partially removed, implementation choice is done through OCL availability check

* Frame and FrameGenerator removed (other code is in following commits)

* CPU render: 4b instead of 3b

* ICP: no Frame class use, one class for both CPU and GPU code

* demo: fix for UMat chain

* TSDF: no Frame or FrameGenerator use

* KinFu: no Frame or FrameGenerator, parametrized for Mat or UMat based on OCL availability

* KinFu::setParams() removed since it has no effect anyway

* TSDF::raycast OCL: fixed normals rendering

* ScopeTime -> CV_TRACE

* 3-dims resolution and size passed to API

* fixed unexpected fails of ICP OCL

* voxels made cubic again

* args fixed a little

* fixed volSize calculation

* Tests: inequal, OCL, unified scene test function

* kinfu_frame: input types fixed

* fixed for Intel HD Graphics

* KinFu demo: setUseOptimized instead of setUseOpenCL

* tsdf: data types fixed

* TSDF OCL: fetch normals implemented

* roundDownPow2 -> utils.hpp

* TSDF OCL: fetch points+normals implemented

* TSDF OCL: NoSize, other fixes for kernel args

* Frame OCL: HAVE_OPENCL, NoSize, other kernel args fixed

* ICP OCL: HAVE_OPENCL, NoSize and other kernel fixes

* KinFu demo fixes: volume size and too long delay

* whitespace fix

* TSDF: allowed sizes not divisable by 32

* TSDF: fixed type traits; added optimization TODOs

* KinFu made non-free

* minor fixes: cast and whitespace

* fixed FastICP test

* warnings fixed: implicit type conversions

* OCL kernels: local args made through KernelArg::Local(lsz) call

* MSVC warnings fixed

* a workaround for broken OCV's bilateral

* KinFu tests made a bit faster

* build fixed until 3.4 isn't merged to master

* warnings fixed, test time shortened
pull/1822/head
Rostislav Vasilikhin 7 years ago committed by Vadim Pisarevsky
parent 2f014f6f40
commit 75bcd397af
  1. 2
      modules/rgbd/README.md
  2. 52
      modules/rgbd/include/opencv2/rgbd/kinfu.hpp
  3. 261
      modules/rgbd/samples/kinfu_demo.cpp
  4. 254
      modules/rgbd/src/fast_icp.cpp
  5. 8
      modules/rgbd/src/fast_icp.hpp
  6. 234
      modules/rgbd/src/kinfu.cpp
  7. 537
      modules/rgbd/src/kinfu_frame.cpp
  8. 54
      modules/rgbd/src/kinfu_frame.hpp
  9. 31
      modules/rgbd/src/odometry.cpp
  10. 239
      modules/rgbd/src/opencl/icp.cl
  11. 287
      modules/rgbd/src/opencl/kinfu_frame.cl
  12. 825
      modules/rgbd/src/opencl/tsdf.cl
  13. 2
      modules/rgbd/src/precomp.hpp
  14. 703
      modules/rgbd/src/tsdf.cpp
  15. 24
      modules/rgbd/src/tsdf.hpp
  16. 32
      modules/rgbd/src/utils.cpp
  17. 31
      modules/rgbd/src/utils.hpp
  18. 113
      modules/rgbd/test/test_kinfu.cpp
  19. 7
      modules/rgbd/test/test_odometry.cpp
  20. 4
      modules/rgbd/test/test_precomp.hpp

@ -15,3 +15,5 @@ Note that the KinectFusion algorithm was patented and its use may be restricted
* _US8401225B2_ Moving object segmentation using depth images
Since OpenCV's license imposes different restrictions on usage please consult a legal before using this algorithm any way.
That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion.

@ -28,19 +28,6 @@ struct CV_EXPORTS_W Params
*/
CV_WRAP static Ptr<Params> coarseParams();
enum PlatformType
{
PLATFORM_CPU, PLATFORM_GPU
};
/** @brief A platform on which to run the algorithm.
*
Currently KinFu supports only one platform which is a CPU.
GPU platform is to be implemented in the future.
*/
PlatformType platform;
/** @brief frame size in pixels */
CV_PROP_RW Size frameSize;
@ -50,8 +37,9 @@ struct CV_EXPORTS_W Params
/** @brief pre-scale per 1 meter for input values
Typical values are:
* 5000 per 1 meter for the 16-bit PNG files of TUM database
* 1 per 1 meter for the 32-bit float images in the ROS bag files
* 5000 per 1 meter for the 16-bit PNG files of TUM database
* 1000 per 1 meter for Kinect 2 device
* 1 per 1 meter for the 32-bit float images in the ROS bag files
*/
CV_PROP_RW float depthFactor;
@ -65,13 +53,13 @@ struct CV_EXPORTS_W Params
/** @brief Number of pyramid levels for ICP */
CV_PROP_RW int pyramidLevels;
/** @brief Resolution of voxel cube
/** @brief Resolution of voxel space
Number of voxels in each cube edge.
Number of voxels in each dimension.
*/
CV_PROP_RW int volumeDims;
/** @brief Size of voxel cube side in meters */
CV_PROP_RW float volumeSize;
CV_PROP_RW Vec3i volumeDims;
/** @brief Size of voxel in meters */
CV_PROP_RW float voxelSize;
/** @brief Minimal camera movement in meters
@ -84,7 +72,7 @@ struct CV_EXPORTS_W Params
/** @brief distance to truncate in meters
Distances that exceed this value will be truncated in voxel cube values.
Distances to surface that exceed this value will be truncated to 1.0.
*/
CV_PROP_RW float tsdf_trunc_dist;
@ -128,11 +116,19 @@ struct CV_EXPORTS_W Params
The output can be obtained as a vector of points and their normals
or can be Phong-rendered from given camera pose.
An internal representation of a model is a voxel cube that keeps TSDF values
An internal representation of a model is a voxel cuboid that keeps TSDF values
which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF).
There is no interface to that representation yet.
This implementation is based on (kinfu-remake)[https://github.com/Nerei/kinfu_remake].
KinFu uses OpenCL acceleration automatically if available.
To enable or disable it explicitly use cv::setUseOptimized() or cv::ocl::setUseOpenCL().
This implementation is based on [kinfu-remake](https://github.com/Nerei/kinfu_remake).
Note that the KinectFusion algorithm was patented and its use may be restricted by
the list of patents mentioned in README.md file in this module directory.
That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion.
*/
class CV_EXPORTS_W KinFu
{
@ -142,11 +138,10 @@ public:
/** @brief Get current parameters */
virtual const Params& getParams() const = 0;
virtual void setParams(const Params&) = 0;
/** @brief Renders a volume into an image
Renders a 0-surface of TSDF using Phong shading into a CV_8UC3 Mat.
Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
Light pose is fixed in KinFu params.
@param image resulting image
@ -186,21 +181,18 @@ public:
*/
CV_WRAP virtual void reset() = 0;
/** @brief Get current pose in voxel cube space */
/** @brief Get current pose in voxel space */
virtual const Affine3f getPose() const = 0;
/** @brief Process next depth frame
Integrates depth into voxel cube with respect to its ICP-calculated pose.
Integrates depth into voxel space with respect to its ICP-calculated pose.
Input image is converted to CV_32F internally if has another type.
@param depth one-channel image which size and depth scale is described in algorithm's parameters
@return true if succeded to align new frame with current scene, false if opposite
*/
CV_WRAP virtual bool update(InputArray depth) = 0;
private:
class KinFuImpl;
};
//! @}

@ -26,7 +26,7 @@ static vector<string> readDepth(std::string fileList)
fstream file(fileList);
if(!file.is_open())
throw std::runtime_error("Failed to open file");
throw std::runtime_error("Failed to read depth list");
std::string dir;
size_t slashIdx = fileList.rfind('/');
@ -48,47 +48,86 @@ static vector<string> readDepth(std::string fileList)
return v;
}
struct DepthWriter
{
DepthWriter(string fileList) :
file(fileList, ios::out), count(0), dir()
{
size_t slashIdx = fileList.rfind('/');
slashIdx = slashIdx != std::string::npos ? slashIdx : fileList.rfind('\\');
dir = fileList.substr(0, slashIdx);
if(!file.is_open())
throw std::runtime_error("Failed to write depth list");
file << "# depth maps saved from device" << endl;
file << "# useless_number filename" << endl;
}
void append(InputArray _depth)
{
Mat depth = _depth.getMat();
string depthFname = cv::format("%04d.png", count);
string fullDepthFname = dir + '/' + depthFname;
if(!imwrite(fullDepthFname, depth))
throw std::runtime_error("Failed to write depth to file " + fullDepthFname);
file << count++ << " " << depthFname << endl;
}
fstream file;
int count;
string dir;
};
const Size kinect2FrameSize(512, 424);
// approximate values, no guarantee to be correct
const float kinect2Focal = 366.1f;
const float kinect2Cx = 258.2f;
const float kinect2Cy = 204.f;
namespace Kinect2Params
{
static const Size frameSize = Size(512, 424);
// approximate values, no guarantee to be correct
static const float focal = 366.1f;
static const float cx = 258.2f;
static const float cy = 204.f;
static const float k1 = 0.12f;
static const float k2 = -0.34f;
static const float k3 = 0.12f;
};
struct DepthSource
{
public:
DepthSource() :
depthFileList(),
frameIdx(0),
vc(),
useKinect2Workarounds(true)
DepthSource("", -1)
{ }
DepthSource(int cam) :
depthFileList(),
frameIdx(),
vc(VideoCaptureAPIs::CAP_OPENNI2 + cam),
useKinect2Workarounds(true)
DepthSource("", cam)
{ }
DepthSource(String fileListName) :
depthFileList(readDepth(fileListName)),
DepthSource(fileListName, -1)
{ }
DepthSource(String fileListName, int cam) :
depthFileList(fileListName.empty() ? vector<string>() : readDepth(fileListName)),
frameIdx(0),
vc(),
vc( cam >= 0 ? VideoCapture(VideoCaptureAPIs::CAP_OPENNI2 + cam) : VideoCapture()),
undistortMap1(),
undistortMap2(),
useKinect2Workarounds(true)
{ }
Mat getDepth()
UMat getDepth()
{
Mat out;
UMat out;
if (!vc.isOpened())
{
if (frameIdx < depthFileList.size())
out = cv::imread(depthFileList[frameIdx++], IMREAD_ANYDEPTH);
{
Mat f = cv::imread(depthFileList[frameIdx++], IMREAD_ANYDEPTH);
f.copyTo(out);
}
else
{
return Mat();
return UMat();
}
}
else
@ -99,8 +138,14 @@ public:
// workaround for Kinect 2
if(useKinect2Workarounds)
{
out = out(Rect(Point(), kinect2FrameSize));
cv::flip(out, out, 1);
out = out(Rect(Point(), Kinect2Params::frameSize));
UMat outCopy;
// linear remap adds gradient between valid and invalid pixels
// which causes garbage, use nearest instead
remap(out, outCopy, undistortMap1, undistortMap2, cv::INTER_NEAREST);
cv::flip(outCopy, out, 1);
}
}
if (out.empty())
@ -125,21 +170,47 @@ public:
// it's recommended to calibrate sensor to obtain its intrinsics
float fx, fy, cx, cy;
fx = fy = useKinect2Workarounds ? kinect2Focal : focal;
cx = useKinect2Workarounds ? kinect2Cx : w/2 - 0.5f;
cy = useKinect2Workarounds ? kinect2Cy : h/2 - 0.5f;
params.frameSize = useKinect2Workarounds ? kinect2FrameSize : Size(w, h);
params.intr = Matx33f(fx, 0, cx,
0, fy, cy,
0, 0, 1);
Size frameSize;
if(useKinect2Workarounds)
{
fx = fy = Kinect2Params::focal;
cx = Kinect2Params::cx;
cy = Kinect2Params::cy;
frameSize = Kinect2Params::frameSize;
}
else
{
fx = fy = focal;
cx = w/2 - 0.5f;
cy = h/2 - 0.5f;
frameSize = Size(w, h);
}
Matx33f camMatrix = Matx33f(fx, 0, cx,
0, fy, cy,
0, 0, 1);
params.frameSize = frameSize;
params.intr = camMatrix;
params.depthFactor = 1000.f;
Matx<float, 1, 5> distCoeffs;
distCoeffs(0) = Kinect2Params::k1;
distCoeffs(1) = Kinect2Params::k2;
distCoeffs(4) = Kinect2Params::k3;
if(useKinect2Workarounds)
initUndistortRectifyMap(camMatrix, distCoeffs, cv::noArray(),
camMatrix, frameSize, CV_16SC2,
undistortMap1, undistortMap2);
}
}
vector<string> depthFileList;
size_t frameIdx;
VideoCapture vc;
UMat undistortMap1, undistortMap2;
bool useKinect2Workarounds;
};
@ -163,7 +234,7 @@ void pauseCallback(const viz::MouseEvent& me, void* args)
{
PauseCallbackArgs pca = *((PauseCallbackArgs*)(args));
viz::Viz3d window(vizWindowName);
Mat rendered;
UMat rendered;
pca.kf.render(rendered, window.getViewerPose().matrix);
imshow("render", rendered);
waitKey(1);
@ -174,10 +245,13 @@ void pauseCallback(const viz::MouseEvent& me, void* args)
static const char* keys =
{
"{help h usage ? | | print this message }"
"{depth | | Path to depth.txt file listing a set of depth images }"
"{depth | | Path to depth.txt file listing a set of depth images }"
"{camera | | Index of depth camera to be used as a depth source }"
"{coarse | | Run on coarse settings (fast but ugly) or on default (slow but looks better),"
" in coarse mode points and normals are displayed }"
"{idle | | Do not run KinFu, just display depth frames }"
"{record | | Write depth frames to specified file list"
" (the same format as for the 'depth' key) }"
};
static const std::string message =
@ -189,9 +263,19 @@ static const std::string message =
int main(int argc, char **argv)
{
bool coarse = false;
bool idle = false;
string recordPath;
CommandLineParser parser(argc, argv, keys);
parser.about(message);
if(!parser.check())
{
parser.printMessage();
parser.printErrors();
return -1;
}
if(parser.has("help"))
{
parser.printMessage();
@ -201,12 +285,13 @@ int main(int argc, char **argv)
{
coarse = true;
}
if(!parser.check())
if(parser.has("record"))
{
parser.printMessage();
parser.printErrors();
return -1;
recordPath = parser.get<String>("record");
}
if(parser.has("idle"))
{
idle = true;
}
DepthSource ds;
@ -222,7 +307,13 @@ int main(int argc, char **argv)
return -1;
}
Ptr<DepthWriter> depthWriter;
if(!recordPath.empty())
depthWriter = makePtr<DepthWriter>(recordPath);
Ptr<Params> params;
Ptr<KinFu> kf;
if(coarse)
params = Params::coarseParams();
else
@ -231,11 +322,15 @@ int main(int argc, char **argv)
// These params can be different for each depth sensor
ds.updateParams(*params);
// Enables OpenCL explicitly (by default can be switched-off)
cv::setUseOptimized(true);
// Scene-specific params should be tuned for each scene individually
//params.volumePose = params.volumePose.translate(Vec3f(0.f, 0.f, 0.5f));
//params.tsdf_max_weight = 16;
//params->volumePose = params->volumePose.translate(Vec3f(0.f, 0.f, 0.5f));
//params->tsdf_max_weight = 16;
Ptr<KinFu> kf = KinFu::create(params);
if(!idle)
kf = KinFu::create(params);
#ifdef HAVE_OPENCV_VIZ
cv::viz::Viz3d window(vizWindowName);
@ -243,18 +338,21 @@ int main(int argc, char **argv)
bool pause = false;
#endif
// TODO: can we use UMats for that?
Mat rendered;
Mat points;
Mat normals;
UMat rendered;
UMat points;
UMat normals;
int64 prevTime = getTickCount();
for(Mat frame = ds.getDepth(); !frame.empty(); frame = ds.getDepth())
for(UMat frame = ds.getDepth(); !frame.empty(); frame = ds.getDepth())
{
if(depthWriter)
depthWriter->append(frame);
#ifdef HAVE_OPENCV_VIZ
if(pause)
{
// doesn't happen in idle mode
kf->getCloud(points, normals);
if(!points.empty() && !normals.empty())
{
@ -263,8 +361,9 @@ int main(int argc, char **argv)
window.showWidget("cloud", cloudWidget);
window.showWidget("normals", cloudNormals);
Vec3d volSize = kf->getParams().voxelSize*Vec3d(kf->getParams().volumeDims);
window.showWidget("cube", viz::WCube(Vec3d::all(0),
Vec3d::all(kf->getParams().volumeSize)),
volSize),
kf->getParams().volumePose);
PauseCallbackArgs pca(*kf);
window.registerMouseCallback(pauseCallback, (void*)&pca);
@ -272,6 +371,8 @@ int main(int argc, char **argv)
"Close the window or press Q to resume"), Point()));
window.spin();
window.removeWidget("text");
window.removeWidget("cloud");
window.removeWidget("normals");
window.registerMouseCallback(0);
}
@ -280,41 +381,49 @@ int main(int argc, char **argv)
else
#endif
{
Mat cvt8;
float depthFactor = kf->getParams().depthFactor;
UMat cvt8;
float depthFactor = params->depthFactor;
convertScaleAbs(frame, cvt8, 0.25*256. / depthFactor);
imshow("depth", cvt8);
if(!kf->update(frame))
if(!idle)
{
kf->reset();
std::cout << "reset" << std::endl;
}
imshow("depth", cvt8);
if(!kf->update(frame))
{
kf->reset();
std::cout << "reset" << std::endl;
}
#ifdef HAVE_OPENCV_VIZ
else
{
if(coarse)
else
{
kf->getCloud(points, normals);
if(!points.empty() && !normals.empty())
if(coarse)
{
viz::WCloud cloudWidget(points, viz::Color::white());
viz::WCloudNormals cloudNormals(points, normals, /*level*/1, /*scale*/0.05, viz::Color::gray());
window.showWidget("cloud", cloudWidget);
window.showWidget("normals", cloudNormals);
kf->getCloud(points, normals);
if(!points.empty() && !normals.empty())
{
viz::WCloud cloudWidget(points, viz::Color::white());
viz::WCloudNormals cloudNormals(points, normals, /*level*/1, /*scale*/0.05, viz::Color::gray());
window.showWidget("cloud", cloudWidget);
window.showWidget("normals", cloudNormals);
}
}
}
//window.showWidget("worldAxes", viz::WCoordinateSystem());
window.showWidget("cube", viz::WCube(Vec3d::all(0),
Vec3d::all(kf->getParams().volumeSize)),
kf->getParams().volumePose);
window.setViewerPose(kf->getPose());
window.spinOnce(1, true);
}
//window.showWidget("worldAxes", viz::WCoordinateSystem());
Vec3d volSize = kf->getParams().voxelSize*kf->getParams().volumeDims;
window.showWidget("cube", viz::WCube(Vec3d::all(0),
volSize),
kf->getParams().volumePose);
window.setViewerPose(kf->getPose());
window.spinOnce(1, true);
}
#endif
kf->render(rendered);
kf->render(rendered);
}
else
{
rendered = cvt8;
}
}
int64 newTime = getTickCount();
@ -325,17 +434,19 @@ int main(int argc, char **argv)
imshow("render", rendered);
int c = waitKey(100);
int c = waitKey(1);
switch (c)
{
case 'r':
kf->reset();
if(!idle)
kf->reset();
break;
case 'q':
return 0;
#ifdef HAVE_OPENCV_VIZ
case 'p':
pause = true;
if(!idle)
pause = true;
#endif
default:
break;

@ -12,52 +12,98 @@ using namespace std;
namespace cv {
namespace kinfu {
enum
{
UTSIZE = 27
};
ICP::ICP(const Intr _intrinsics, const std::vector<int>& _iterations, float _angleThreshold, float _distanceThreshold) :
iterations(_iterations), angleThreshold(_angleThreshold), distanceThreshold(_distanceThreshold),
intrinsics(_intrinsics)
{ }
///////// CPU implementation /////////
class ICPCPU : public ICP
class ICPImpl : public ICP
{
public:
ICPCPU(const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations, float _angleThreshold, float _distanceThreshold);
ICPImpl(const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations, float _angleThreshold, float _distanceThreshold);
virtual bool estimateTransform(cv::Affine3f& transform, cv::Ptr<Frame> oldFrame, cv::Ptr<Frame> newFrame) const override;
virtual bool estimateTransform(cv::Affine3f& transform,
InputArray oldPoints, InputArray oldNormals,
InputArray newPoints, InputArray newNormals
) const override;
virtual ~ICPCPU() { }
template < typename T >
bool estimateTransformT(cv::Affine3f& transform,
const vector<T>& oldPoints, const vector<T>& oldNormals,
const vector<T>& newPoints, const vector<T>& newNormals
) const;
private:
void getAb(const Points &oldPts, const Normals &oldNrm, const Points &newPts, const Normals &newNrm,
virtual ~ICPImpl() { }
template < typename T >
void getAb(const T& oldPts, const T& oldNrm, const T& newPts, const T& newNrm,
cv::Affine3f pose, int level, cv::Matx66f& A, cv::Vec6f& b) const;
};
private:
ICPCPU::ICPCPU(const Intr _intrinsics, const std::vector<int> &_iterations, float _angleThreshold, float _distanceThreshold) :
ICP(_intrinsics, _iterations, _angleThreshold, _distanceThreshold)
mutable vector<UMat> groupedSumBuffers;
};
ICPImpl::ICPImpl(const Intr _intrinsics, const std::vector<int> &_iterations, float _angleThreshold, float _distanceThreshold) :
ICP(_intrinsics, _iterations, _angleThreshold, _distanceThreshold),
groupedSumBuffers(_iterations.size())
{ }
bool ICPCPU::estimateTransform(cv::Affine3f& transform, cv::Ptr<Frame> _oldFrame, cv::Ptr<Frame> _newFrame) const
bool ICPImpl::estimateTransform(cv::Affine3f& transform,
InputArray _oldPoints, InputArray _oldNormals,
InputArray _newPoints, InputArray _newNormals
) const
{
ScopeTime st("icp");
CV_TRACE_FUNCTION();
cv::Ptr<FrameCPU> oldFrame = _oldFrame.dynamicCast<FrameCPU>();
cv::Ptr<FrameCPU> newFrame = _newFrame.dynamicCast<FrameCPU>();
CV_Assert(_oldPoints.size() == _oldNormals.size());
CV_Assert(_newPoints.size() == _newNormals.size());
CV_Assert(_oldPoints.size() == _newPoints.size());
const std::vector<Points>& oldPoints = oldFrame->points;
const std::vector<Normals>& oldNormals = oldFrame->normals;
const std::vector<Points>& newPoints = newFrame->points;
const std::vector<Normals>& newNormals = newFrame->normals;
#ifdef HAVE_OPENCL
if(cv::ocl::isOpenCLActivated() &&
_oldPoints.isUMatVector() && _oldNormals.isUMatVector() &&
_newPoints.isUMatVector() && _newNormals.isUMatVector())
{
std::vector<UMat> op, np, on, nn;
_oldPoints.getUMatVector(op);
_newPoints.getUMatVector(np);
_oldNormals.getUMatVector(on);
_newNormals.getUMatVector(nn);
return estimateTransformT<UMat>(transform, op, on, np, nn);
}
#endif
std::vector<Mat> op, on, np, nn;
_oldPoints.getMatVector(op);
_newPoints.getMatVector(np);
_oldNormals.getMatVector(on);
_newNormals.getMatVector(nn);
return estimateTransformT<Mat>(transform, op, on, np, nn);
}
template < typename T >
bool ICPImpl::estimateTransformT(cv::Affine3f& transform,
const vector<T>& oldPoints, const vector<T>& oldNormals,
const vector<T>& newPoints, const vector<T>& newNormals
) const
{
CV_TRACE_FUNCTION();
transform = Affine3f::Identity();
for(size_t l = 0; l < iterations.size(); l++)
{
size_t level = iterations.size() - 1 - l;
Points oldPts = oldPoints [level], newPts = newPoints [level];
Normals oldNrm = oldNormals[level], newNrm = newNormals[level];
const T& oldPts = oldPoints [level], newPts = newPoints [level];
const T& oldNrm = oldNormals[level], newNrm = newNormals[level];
for(int iter = 0; iter < iterations[level]; iter++)
{
@ -83,10 +129,13 @@ bool ICPCPU::estimateTransform(cv::Affine3f& transform, cv::Ptr<Frame> _oldFrame
return true;
}
///////// CPU implementation /////////
// 1 any coord to check is enough since we know the generation
#if CV_SIMD128
#if USE_INTRINSICS
static inline bool fastCheck(const v_float32x4& p0, const v_float32x4& p1)
{
float check = (p0.get0() + p1.get0());
@ -125,11 +174,6 @@ typedef Matx<float, 6, 7> ABtype;
struct GetAbInvoker : ParallelLoopBody
{
enum
{
UTSIZE = 27
};
GetAbInvoker(ABtype& _globalAb, Mutex& _mtx,
const Points& _oldPts, const Normals& _oldNrm, const Points& _newPts, const Normals& _newNrm,
Affine3f _pose, Intr::Projector _proj, float _sqDistanceThresh, float _minCos) :
@ -141,7 +185,7 @@ struct GetAbInvoker : ParallelLoopBody
virtual void operator ()(const Range& range) const override
{
#if CV_SIMD128
#if USE_INTRINSICS
CV_Assert(ptype::channels == 4);
const size_t utBufferSize = 9;
@ -447,18 +491,20 @@ struct GetAbInvoker : ParallelLoopBody
};
void ICPCPU::getAb(const Points& oldPts, const Normals& oldNrm, const Points& newPts, const Normals& newNrm,
Affine3f pose, int level, Matx66f &A, Vec6f &b) const
template <>
void ICPImpl::getAb<Mat>(const Mat& oldPts, const Mat& oldNrm, const Mat& newPts, const Mat& newNrm,
cv::Affine3f pose, int level, cv::Matx66f& A, cv::Vec6f& b) const
{
ScopeTime st("icp: get ab", false);
CV_TRACE_FUNCTION();
CV_Assert(oldPts.size() == oldNrm.size());
CV_Assert(newPts.size() == newNrm.size());
ABtype sumAB = ABtype::zeros();
Mutex mutex;
GetAbInvoker invoker(sumAB, mutex, oldPts, oldNrm, newPts, newNrm, pose,
const Points op(oldPts), on(oldNrm);
const Normals np(newPts), nn(newNrm);
GetAbInvoker invoker(sumAB, mutex, op, on, np, nn, pose,
intrinsics.scale(level).makeProjector(),
distanceThreshold*distanceThreshold, cos(angleThreshold));
Range range(0, newPts.rows);
@ -480,39 +526,133 @@ void ICPCPU::getAb(const Points& oldPts, const Normals& oldNrm, const Points& ne
///////// GPU implementation /////////
class ICPGPU : public ICP
#ifdef HAVE_OPENCL
template <>
void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& newPts, const UMat& newNrm,
Affine3f pose, int level, Matx66f &A, Vec6f &b) const
{
public:
ICPGPU(const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations, float _angleThreshold, float _distanceThreshold);
CV_TRACE_FUNCTION();
Size oldSize = oldPts.size(), newSize = newPts.size();
CV_Assert(oldSize == oldNrm.size());
CV_Assert(newSize == newNrm.size());
// calculate 1x7 vector ab to produce b and upper triangle of A:
// [A|b] = ab*(ab^t)
// and then reduce it across work groups
cv::String errorStr;
ocl::ProgramSource source = ocl::rgbd::icp_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
ocl::Kernel k;
k.create("getAb", source, options, &errorStr);
if(k.empty())
throw std::runtime_error("Failed to create kernel: " + errorStr);
size_t globalSize[2];
globalSize[0] = (size_t)newPts.cols;
globalSize[1] = (size_t)newPts.rows;
const ocl::Device& device = ocl::Device::getDefault();
// workaround for Intel's integrated GPU
size_t wgsLimit = device.isIntel() ? 64 : device.maxWorkGroupSize();
size_t memSize = device.localMemSize();
// local memory should keep upperTriangles for all threads in group for reduce
const size_t ltsz = UTSIZE*sizeof(float);
const size_t lcols = 32;
size_t lrows = min(memSize/ltsz, wgsLimit)/lcols;
// round lrows down to 2^n
lrows = roundDownPow2(lrows);
size_t localSize[2] = {lcols, lrows};
Size ngroups((int)divUp(globalSize[0], (unsigned int)localSize[0]),
(int)divUp(globalSize[1], (unsigned int)localSize[1]));
// size of local buffer for group-wide reduce
size_t lsz = localSize[0]*localSize[1]*ltsz;
Intr::Projector proj = intrinsics.scale(level).makeProjector();
Vec2f fxy(proj.fx, proj.fy), cxy(proj.cx, proj.cy);
UMat& groupedSumGpu = groupedSumBuffers[level];
groupedSumGpu.create(Size(ngroups.width*UTSIZE, ngroups.height),
CV_32F);
groupedSumGpu.setTo(0);
// TODO: optimization possible:
// samplers instead of oldPts/oldNrm (mask needed)
k.args(ocl::KernelArg::ReadOnlyNoSize(oldPts),
ocl::KernelArg::ReadOnlyNoSize(oldNrm),
oldSize,
ocl::KernelArg::ReadOnlyNoSize(newPts),
ocl::KernelArg::ReadOnlyNoSize(newNrm),
newSize,
ocl::KernelArg::Constant(pose.matrix.val,
sizeof(pose.matrix.val)),
fxy.val, cxy.val,
distanceThreshold*distanceThreshold,
cos(angleThreshold),
//TODO: replace by KernelArg::Local(lsz)
ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
ocl::KernelArg::WriteOnlyNoSize(groupedSumGpu)
);
if(!k.run(2, globalSize, localSize, true))
throw std::runtime_error("Failed to run kernel");
float upperTriangle[UTSIZE];
for(int i = 0; i < UTSIZE; i++)
upperTriangle[i] = 0;
Mat groupedSumCpu = groupedSumGpu.getMat(ACCESS_READ);
for(int y = 0; y < ngroups.height; y++)
{
const float* rowr = groupedSumCpu.ptr<float>(y);
for(int x = 0; x < ngroups.width; x++)
{
const float* p = rowr + x*UTSIZE;
for(int j = 0; j < UTSIZE; j++)
{
upperTriangle[j] += p[j];
}
}
}
groupedSumCpu.release();
virtual bool estimateTransform(cv::Affine3f& transform, cv::Ptr<Frame> oldFrame, cv::Ptr<Frame> newFrame) const override;
ABtype sumAB = ABtype::zeros();
int pos = 0;
for(int i = 0; i < 6; i++)
{
for(int j = i; j < 7; j++)
{
sumAB(i, j) = upperTriangle[pos++];
}
}
virtual ~ICPGPU() { }
};
// splitting AB matrix to A and b
for(int i = 0; i < 6; i++)
{
// augment lower triangle of A by symmetry
for(int j = i; j < 6; j++)
{
A(i, j) = A(j, i) = sumAB(i, j);
}
ICPGPU::ICPGPU(const Intr _intrinsics, const std::vector<int> &_iterations, float _angleThreshold, float _distanceThreshold) :
ICP(_intrinsics, _iterations, _angleThreshold, _distanceThreshold)
{ }
b(i) = sumAB(i, 6);
}
}
#endif
bool ICPGPU::estimateTransform(cv::Affine3f& /*transform*/, cv::Ptr<Frame> /*_oldFrame*/, cv::Ptr<Frame> /*newFrame*/) const
{
throw std::runtime_error("Not implemented");
}
///
cv::Ptr<ICP> makeICP(cv::kinfu::Params::PlatformType t,
const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations,
cv::Ptr<ICP> makeICP(const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations,
float _angleThreshold, float _distanceThreshold)
{
switch (t)
{
case cv::kinfu::Params::PlatformType::PLATFORM_CPU:
return cv::makePtr<ICPCPU>(_intrinsics, _iterations, _angleThreshold, _distanceThreshold);
case cv::kinfu::Params::PlatformType::PLATFORM_GPU:
return cv::makePtr<ICPGPU>(_intrinsics, _iterations, _angleThreshold, _distanceThreshold);
default:
return cv::Ptr<ICP>();
}
return makePtr<ICPImpl>(_intrinsics, _iterations, _angleThreshold, _distanceThreshold);
}
} // namespace kinfu

@ -18,7 +18,10 @@ class ICP
public:
ICP(const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations, float _angleThreshold, float _distanceThreshold);
virtual bool estimateTransform(cv::Affine3f& transform, cv::Ptr<Frame> oldFrame, cv::Ptr<Frame> newFrame) const = 0;
virtual bool estimateTransform(cv::Affine3f& transform,
InputArray oldPoints, InputArray oldNormals,
InputArray newPoints, InputArray newNormals
) const = 0;
virtual ~ICP() { }
@ -30,8 +33,7 @@ protected:
cv::kinfu::Intr intrinsics;
};
cv::Ptr<ICP> makeICP(cv::kinfu::Params::PlatformType t,
const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations,
cv::Ptr<ICP> makeICP(const cv::kinfu::Intr _intrinsics, const std::vector<int> &_iterations,
float _angleThreshold, float _distanceThreshold);
} // namespace kinfu

@ -12,45 +12,10 @@
namespace cv {
namespace kinfu {
class KinFu::KinFuImpl : public KinFu
{
public:
KinFuImpl(const Params& _params);
virtual ~KinFuImpl();
const Params& getParams() const CV_OVERRIDE;
void setParams(const Params&) CV_OVERRIDE;
void render(OutputArray image, const Matx44f& cameraPose) const CV_OVERRIDE;
void getCloud(OutputArray points, OutputArray normals) const CV_OVERRIDE;
void getPoints(OutputArray points) const CV_OVERRIDE;
void getNormals(InputArray points, OutputArray normals) const CV_OVERRIDE;
void reset() CV_OVERRIDE;
const Affine3f getPose() const CV_OVERRIDE;
bool update(InputArray depth) CV_OVERRIDE;
private:
Params params;
cv::Ptr<FrameGenerator> frameGenerator;
cv::Ptr<ICP> icp;
cv::Ptr<TSDFVolume> volume;
int frameCounter;
Affine3f pose;
cv::Ptr<Frame> frame;
};
Ptr<Params> Params::defaultParams()
{
Params p;
p.platform = PLATFORM_CPU;
p.frameSize = Size(640, 480);
float fx, fy, cx, cy;
@ -88,12 +53,13 @@ Ptr<Params> Params::defaultParams()
p.tsdf_min_camera_movement = 0.f; //meters, disabled
p.volumeDims = 512; //number of voxels
p.volumeDims = Vec3i::all(512); //number of voxels
p.volumeSize = 3.f; //meters
float volSize = 3.f;
p.voxelSize = volSize/512.f; //meters
// default pose of volume cube
p.volumePose = Affine3f().translate(Vec3f(-p.volumeSize/2, -p.volumeSize/2, 0.5f));
p.volumePose = Affine3f().translate(Vec3f(-volSize/2.f, -volSize/2.f, 0.5f));
p.tsdf_trunc_dist = 0.04f; //meters;
p.tsdf_max_weight = 64; //frames
@ -129,78 +95,156 @@ Ptr<Params> Params::coarseParams()
}
p->pyramidLevels = (int)p->icpIterations.size();
p->volumeDims = 128; //number of voxels
float volSize = 3.f;
p->volumeDims = Vec3i::all(128); //number of voxels
p->voxelSize = volSize/128.f;
p->raycast_step_factor = 0.75f; //in voxel sizes
return p;
}
KinFu::KinFuImpl::KinFuImpl(const Params &_params) :
// T should be Mat or UMat
template< typename T >
class KinFuImpl : public KinFu
{
public:
KinFuImpl(const Params& _params);
virtual ~KinFuImpl();
const Params& getParams() const CV_OVERRIDE;
void render(OutputArray image, const Matx44f& cameraPose) const CV_OVERRIDE;
void getCloud(OutputArray points, OutputArray normals) const CV_OVERRIDE;
void getPoints(OutputArray points) const CV_OVERRIDE;
void getNormals(InputArray points, OutputArray normals) const CV_OVERRIDE;
void reset() CV_OVERRIDE;
const Affine3f getPose() const CV_OVERRIDE;
bool update(InputArray depth) CV_OVERRIDE;
bool updateT(const T& depth);
private:
Params params;
cv::Ptr<ICP> icp;
cv::Ptr<TSDFVolume> volume;
int frameCounter;
Affine3f pose;
std::vector<T> pyrPoints;
std::vector<T> pyrNormals;
};
template< typename T >
KinFuImpl<T>::KinFuImpl(const Params &_params) :
params(_params),
frameGenerator(makeFrameGenerator(params.platform)),
icp(makeICP(params.platform, params.intr, params.icpIterations, params.icpAngleThresh, params.icpDistThresh)),
volume(makeTSDFVolume(params.platform, params.volumeDims, params.volumeSize, params.volumePose,
icp(makeICP(params.intr, params.icpIterations, params.icpAngleThresh, params.icpDistThresh)),
volume(makeTSDFVolume(params.volumeDims, params.voxelSize, params.volumePose,
params.tsdf_trunc_dist, params.tsdf_max_weight,
params.raycast_step_factor)),
frame()
pyrPoints(), pyrNormals()
{
reset();
}
void KinFu::KinFuImpl::reset()
template< typename T >
void KinFuImpl<T>::reset()
{
frameCounter = 0;
pose = Affine3f::Identity();
volume->reset();
}
KinFu::KinFuImpl::~KinFuImpl()
{
template< typename T >
KinFuImpl<T>::~KinFuImpl()
{ }
template< typename T >
const Params& KinFuImpl<T>::getParams() const
{
return params;
}
const Params& KinFu::KinFuImpl::getParams() const
template< typename T >
const Affine3f KinFuImpl<T>::getPose() const
{
return params;
return pose;
}
void KinFu::KinFuImpl::setParams(const Params& p)
template<>
bool KinFuImpl<Mat>::update(InputArray _depth)
{
params = p;
CV_Assert(!_depth.empty() && _depth.size() == params.frameSize);
Mat depth;
if(_depth.isUMat())
{
_depth.copyTo(depth);
return updateT(depth);
}
else
{
return updateT(_depth.getMat());
}
}
const Affine3f KinFu::KinFuImpl::getPose() const
template<>
bool KinFuImpl<UMat>::update(InputArray _depth)
{
return pose;
CV_Assert(!_depth.empty() && _depth.size() == params.frameSize);
UMat depth;
if(!_depth.isUMat())
{
_depth.copyTo(depth);
return updateT(depth);
}
else
{
return updateT(_depth.getUMat());
}
}
bool KinFu::KinFuImpl::update(InputArray _depth)
template< typename T >
bool KinFuImpl<T>::updateT(const T& _depth)
{
ScopeTime st("kinfu update");
CV_TRACE_FUNCTION();
CV_Assert(!_depth.empty() && _depth.size() == params.frameSize);
T depth;
if(_depth.type() != DEPTH_TYPE)
_depth.convertTo(depth, DEPTH_TYPE);
else
depth = _depth;
cv::Ptr<Frame> newFrame = (*frameGenerator)();
(*frameGenerator)(newFrame, _depth,
params.intr,
params.pyramidLevels,
params.depthFactor,
params.bilateral_sigma_depth,
params.bilateral_sigma_spatial,
params.bilateral_kernel_size);
std::vector<T> newPoints, newNormals;
makeFrameFromDepth(depth, newPoints, newNormals, params.intr,
params.pyramidLevels,
params.depthFactor,
params.bilateral_sigma_depth,
params.bilateral_sigma_spatial,
params.bilateral_kernel_size);
if(frameCounter == 0)
{
// use depth instead of distance
volume->integrate(newFrame, params.depthFactor, pose, params.intr);
volume->integrate(depth, params.depthFactor, pose, params.intr);
frame = newFrame;
pyrPoints = newPoints;
pyrNormals = newNormals;
}
else
{
Affine3f affine;
bool success = icp->estimateTransform(affine, frame, newFrame);
bool success = icp->estimateTransform(affine, pyrPoints, pyrNormals, newPoints, newNormals);
if(!success)
return false;
@ -212,12 +256,15 @@ bool KinFu::KinFuImpl::update(InputArray _depth)
if((rnorm + tnorm)/2 >= params.tsdf_min_camera_movement)
{
// use depth instead of distance
volume->integrate(newFrame, params.depthFactor, pose, params.intr);
volume->integrate(depth, params.depthFactor, pose, params.intr);
}
// raycast and build a pyramid of points and normals
volume->raycast(pose, params.intr, params.frameSize,
params.pyramidLevels, frameGenerator, frame);
T& points = pyrPoints [0];
T& normals = pyrNormals[0];
volume->raycast(pose, params.intr, params.frameSize, points, normals);
// build a pyramid of points and normals
buildPyramidPointsNormals(points, normals, pyrPoints, pyrNormals,
params.pyramidLevels);
}
frameCounter++;
@ -225,9 +272,10 @@ bool KinFu::KinFuImpl::update(InputArray _depth)
}
void KinFu::KinFuImpl::render(OutputArray image, const Matx44f& _cameraPose) const
template< typename T >
void KinFuImpl<T>::render(OutputArray image, const Matx44f& _cameraPose) const
{
ScopeTime st("kinfu render");
CV_TRACE_FUNCTION();
Affine3f cameraPose(_cameraPose);
@ -235,40 +283,58 @@ void KinFu::KinFuImpl::render(OutputArray image, const Matx44f& _cameraPose) con
if((cameraPose.rotation() == pose.rotation() && cameraPose.translation() == pose.translation()) ||
(cameraPose.rotation() == id.rotation() && cameraPose.translation() == id.translation()))
{
frame->render(image, 0, params.lightPose);
renderPointsNormals(pyrPoints[0], pyrNormals[0], image, params.lightPose);
}
else
{
// raycast and build a pyramid of points and normals
cv::Ptr<Frame> f = (*frameGenerator)();
volume->raycast(cameraPose, params.intr, params.frameSize,
params.pyramidLevels, frameGenerator, f);
f->render(image, 0, params.lightPose);
T points, normals;
volume->raycast(cameraPose, params.intr, params.frameSize, points, normals);
renderPointsNormals(points, normals, image, params.lightPose);
}
}
void KinFu::KinFuImpl::getCloud(OutputArray p, OutputArray n) const
template< typename T >
void KinFuImpl<T>::getCloud(OutputArray p, OutputArray n) const
{
volume->fetchPointsNormals(p, n);
}
void KinFu::KinFuImpl::getPoints(OutputArray points) const
template< typename T >
void KinFuImpl<T>::getPoints(OutputArray points) const
{
volume->fetchPointsNormals(points, noArray());
}
void KinFu::KinFuImpl::getNormals(InputArray points, OutputArray normals) const
template< typename T >
void KinFuImpl<T>::getNormals(InputArray points, OutputArray normals) const
{
volume->fetchNormals(points, normals);
}
// importing class
Ptr<KinFu> KinFu::create(const Ptr<Params>& _params)
#ifdef OPENCV_ENABLE_NONFREE
Ptr<KinFu> KinFu::create(const Ptr<Params>& params)
{
#ifdef HAVE_OPENCL
if(cv::ocl::isOpenCLActivated())
return makePtr< KinFuImpl<UMat> >(*params);
#endif
return makePtr< KinFuImpl<Mat> >(*params);
}
#else
Ptr<KinFu> KinFu::create(const Ptr<Params>& /*params*/)
{
return makePtr<KinFu::KinFuImpl>(*_params);
CV_Error(Error::StsNotImplemented,
"This algorithm is patented and is excluded in this configuration; "
"Set OPENCV_ENABLE_NONFREE CMake option and rebuild the library");
}
#endif
KinFu::~KinFu() {}

@ -10,98 +10,9 @@
namespace cv {
namespace kinfu {
struct FrameGeneratorCPU : FrameGenerator
{
public:
virtual cv::Ptr<Frame> operator ()() const override;
virtual void operator() (Ptr<Frame> _frame, InputArray depth, const kinfu::Intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize) const override;
virtual void operator() (Ptr<Frame> _frame, InputArray points, InputArray normals, int levels) const override;
virtual ~FrameGeneratorCPU() {}
};
void computePointsNormals(const cv::kinfu::Intr, float depthFactor, const Depth, Points, Normals );
Depth pyrDownBilateral(const Depth depth, float sigma);
void pyrDownPointsNormals(const Points p, const Normals n, Points& pdown, Normals& ndown);
cv::Ptr<Frame> FrameGeneratorCPU::operator ()() const
{
return makePtr<FrameCPU>();
}
void FrameGeneratorCPU::operator ()(Ptr<Frame> _frame, InputArray depth, const Intr intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize) const
{
ScopeTime st("frameGenerator: from depth");
Ptr<FrameCPU> frame = _frame.dynamicCast<FrameCPU>();
CV_Assert(frame);
//CV_Assert(depth.type() == CV_16S);
// this should convert CV_16S to CV_32F
frame->depthData = Depth(depth.getMat());
// looks like OpenCV's bilateral filter works the same as KinFu's
Depth smooth;
bilateralFilter(frame->depthData, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial);
// depth truncation is not used by default
//if (p.icp_truncate_depth_dist > 0) kfusion::cuda::depthTruncation(curr_.depth_pyr[0], p.icp_truncate_depth_dist);
// we don't need depth pyramid outside this method
// if we do, the code is to be refactored
Depth scaled = smooth;
Size sz = smooth.size();
frame->points.resize(levels);
frame->normals.resize(levels);
for(int i = 0; i < levels; i++)
{
Points p = frame->points[i];
Normals n = frame->normals[i];
p.create(sz); n.create(sz);
computePointsNormals(intr.scale(i), depthFactor, scaled, p, n);
frame->points[i] = p;
frame->normals[i] = n;
if(i < levels - 1)
{
sz.width /= 2; sz.height /= 2;
scaled = pyrDownBilateral(scaled, sigmaDepth*depthFactor);
}
}
}
void FrameGeneratorCPU::operator ()(Ptr<Frame> _frame, InputArray _points, InputArray _normals, int levels) const
{
ScopeTime st("frameGenerator: pyrDown p, n");
CV_Assert( _points.type() == DataType<Points::value_type>::type);
CV_Assert(_normals.type() == DataType<Points::value_type>::type);
Ptr<FrameCPU> frame = _frame.dynamicCast<FrameCPU>();
CV_Assert(frame);
frame->depthData = Depth();
frame->points.resize(levels);
frame->normals.resize(levels);
frame->points[0] = _points.getMat();
frame->normals[0] = _normals.getMat();
Size sz = _points.size();
for(int i = 1; i < levels; i++)
{
sz.width /= 2; sz.height /= 2;
frame->points[i].create(sz);
frame->normals[i].create(sz);
pyrDownPointsNormals(frame->points[i-1], frame->normals[i-1],
frame->points[i ], frame->normals[i ]);
}
}
static void computePointsNormals(const cv::kinfu::Intr, float depthFactor, const Depth, Points, Normals );
static Depth pyrDownBilateral(const Depth depth, float sigma);
static void pyrDownPointsNormals(const Points p, const Normals n, Points& pdown, Normals& ndown);
template<int p>
inline float specPow(float x)
@ -132,7 +43,7 @@ inline float specPow<1>(float x)
struct RenderInvoker : ParallelLoopBody
{
RenderInvoker(const Points& _points, const Normals& _normals, Mat_<Vec3b>& _img, Affine3f _lightPose, Size _sz) :
RenderInvoker(const Points& _points, const Normals& _normals, Mat_<Vec4b>& _img, Affine3f _lightPose, Size _sz) :
ParallelLoopBody(),
points(_points),
normals(_normals),
@ -145,7 +56,7 @@ struct RenderInvoker : ParallelLoopBody
{
for(int y = range.start; y < range.end; y++)
{
Vec3b* imgRow = img[y];
Vec4b* imgRow = img[y];
const ptype* ptsRow = points[y];
const ptype* nrmRow = normals[y];
@ -154,11 +65,11 @@ struct RenderInvoker : ParallelLoopBody
Point3f p = fromPtype(ptsRow[x]);
Point3f n = fromPtype(nrmRow[x]);
Vec3b color;
Vec4b color;
if(isNaN(p))
{
color = Vec3b(0, 32, 0);
color = Vec4b(0, 32, 0, 0);
}
else
{
@ -178,7 +89,7 @@ struct RenderInvoker : ParallelLoopBody
uchar ix = (uchar)((Ax*Ka*Dx + Lx*Kd*Dx*max(0.f, n.dot(l)) +
Lx*Ks*Sx*specPow<sp>(max(0.f, r.dot(v))))*255.f);
color = Vec3b(ix, ix, ix);
color = Vec4b(ix, ix, ix, 0);
}
imgRow[x] = color;
@ -188,38 +99,16 @@ struct RenderInvoker : ParallelLoopBody
const Points& points;
const Normals& normals;
Mat_<Vec3b>& img;
Mat_<Vec4b>& img;
Affine3f lightPose;
Size sz;
};
void FrameCPU::render(OutputArray image, int level, Affine3f lightPose) const
{
ScopeTime st("frame render");
CV_Assert(level < (int)points.size());
CV_Assert(level < (int)normals.size());
Size sz = points[level].size();
image.create(sz, CV_8UC3);
Mat_<Vec3b> img = image.getMat();
RenderInvoker ri(points[level], normals[level], img, lightPose, sz);
Range range(0, sz.height);
const int nstripes = -1;
parallel_for_(range, ri, nstripes);
}
void FrameCPU::getDepth(OutputArray _depth) const
{
CV_Assert(!depthData.empty());
_depth.assign(depthData);
}
void pyrDownPointsNormals(const Points p, const Normals n, Points &pdown, Normals &ndown)
{
CV_TRACE_FUNCTION();
for(int y = 0; y < pdown.rows; y++)
{
ptype* ptsRow = pdown[y];
@ -310,6 +199,8 @@ struct PyrDownBilateralInvoker : ParallelLoopBody
Depth pyrDownBilateral(const Depth depth, float sigma)
{
CV_TRACE_FUNCTION();
Depth depthDown(depth.rows/2, depth.cols/2);
PyrDownBilateralInvoker pdi(depth, depthDown, sigma);
@ -386,6 +277,8 @@ struct ComputePointsNormalsInvoker : ParallelLoopBody
void computePointsNormals(const Intr intr, float depthFactor, const Depth depth,
Points points, Normals normals)
{
CV_TRACE_FUNCTION();
CV_Assert(!points.empty() && !normals.empty());
CV_Assert(depth.size() == points.size());
CV_Assert(depth.size() == normals.size());
@ -405,53 +298,399 @@ void computePointsNormals(const Intr intr, float depthFactor, const Depth depth,
///////// GPU implementation /////////
struct FrameGeneratorGPU : FrameGenerator
#ifdef HAVE_OPENCL
static bool ocl_renderPointsNormals(const UMat points, const UMat normals, UMat image, Affine3f lightPose);
static bool ocl_makeFrameFromDepth(const UMat depth, OutputArrayOfArrays points, OutputArrayOfArrays normals,
const Intr intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize);
static bool ocl_buildPyramidPointsNormals(const UMat points, const UMat normals,
OutputArrayOfArrays pyrPoints, OutputArrayOfArrays pyrNormals,
int levels);
static bool computePointsNormalsGpu(const Intr intr, float depthFactor, const UMat& depth, UMat& points, UMat& normals);
static bool pyrDownBilateralGpu(const UMat& depth, UMat& depthDown, float sigma);
static bool customBilateralFilterGpu(const UMat src, UMat& dst, int kernelSize, float sigmaDepth, float sigmaSpatial);
static bool pyrDownPointsNormalsGpu(const UMat p, const UMat n, UMat &pdown, UMat &ndown);
bool computePointsNormalsGpu(const Intr intr, float depthFactor, const UMat& depth,
UMat& points, UMat& normals)
{
public:
virtual cv::Ptr<Frame> operator ()() const override;
virtual void operator() (Ptr<Frame> frame, InputArray depth, const kinfu::Intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize) const override;
virtual void operator() (Ptr<Frame> frame, InputArray points, InputArray normals, int levels) const override;
CV_TRACE_FUNCTION();
virtual ~FrameGeneratorGPU() {}
};
CV_Assert(!points.empty() && !normals.empty());
CV_Assert(depth.size() == points.size());
CV_Assert(depth.size() == normals.size());
CV_Assert(depth.type() == DEPTH_TYPE);
CV_Assert(points.type() == POINT_TYPE);
CV_Assert(normals.type() == POINT_TYPE);
// conversion to meters
float dfac = 1.f/depthFactor;
Intr::Reprojector reproj = intr.makeReprojector();
cv::String errorStr;
cv::String name = "computePointsNormals";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr);
if(k.empty())
return false;
Vec2f fxyinv(reproj.fxinv, reproj.fyinv), cxy(reproj.cx, reproj.cy);
k.args(ocl::KernelArg::WriteOnlyNoSize(points),
ocl::KernelArg::WriteOnlyNoSize(normals),
ocl::KernelArg::ReadOnly(depth),
fxyinv.val,
cxy.val,
dfac);
size_t globalSize[2];
globalSize[0] = (size_t)depth.cols;
globalSize[1] = (size_t)depth.rows;
return k.run(2, globalSize, NULL, true);
}
bool pyrDownBilateralGpu(const UMat& depth, UMat& depthDown, float sigma)
{
CV_TRACE_FUNCTION();
depthDown.create(depth.rows/2, depth.cols/2, DEPTH_TYPE);
cv::String errorStr;
cv::String name = "pyrDownBilateral";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr);
if(k.empty())
return false;
k.args(ocl::KernelArg::ReadOnly(depth),
ocl::KernelArg::WriteOnly(depthDown),
sigma);
size_t globalSize[2];
globalSize[0] = (size_t)depthDown.cols;
globalSize[1] = (size_t)depthDown.rows;
return k.run(2, globalSize, NULL, true);
}
//TODO: remove it when OpenCV's bilateral processes 32f on GPU
bool customBilateralFilterGpu(const UMat src /* udepth */, UMat& dst /* smooth */,
int kernelSize, float sigmaDepth, float sigmaSpatial)
{
CV_TRACE_FUNCTION();
Size frameSize = src.size();
CV_Assert(frameSize.area() > 0);
CV_Assert(src.type() == DEPTH_TYPE);
dst.create(frameSize, DEPTH_TYPE);
cv::String errorStr;
cv::String name = "customBilateral";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr);
cv::Ptr<Frame> FrameGeneratorGPU::operator ()() const
if(k.empty())
return false;
k.args(ocl::KernelArg::ReadOnlyNoSize(src),
ocl::KernelArg::WriteOnlyNoSize(dst),
frameSize,
kernelSize,
0.5f / (sigmaSpatial * sigmaSpatial),
0.5f / (sigmaDepth * sigmaDepth));
size_t globalSize[2];
globalSize[0] = (size_t)src.cols;
globalSize[1] = (size_t)src.rows;
return k.run(2, globalSize, NULL, true);
}
bool pyrDownPointsNormalsGpu(const UMat p, const UMat n, UMat &pdown, UMat &ndown)
{
return makePtr<FrameGPU>();
CV_TRACE_FUNCTION();
cv::String errorStr;
cv::String name = "pyrDownPointsNormals";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr);
if(k.empty())
return false;
Size downSize = pdown.size();
k.args(ocl::KernelArg::ReadOnlyNoSize(p),
ocl::KernelArg::ReadOnlyNoSize(n),
ocl::KernelArg::WriteOnlyNoSize(pdown),
ocl::KernelArg::WriteOnlyNoSize(ndown),
downSize);
size_t globalSize[2];
globalSize[0] = (size_t)pdown.cols;
globalSize[1] = (size_t)pdown.rows;
return k.run(2, globalSize, NULL, true);
}
void FrameGeneratorGPU::operator ()(Ptr<Frame> /*frame*/, InputArray /*depth*/, const Intr /*intr*/, int /*levels*/, float /*depthFactor*/,
float /*sigmaDepth*/, float /*sigmaSpatial*/, int /*kernelSize*/) const
static bool ocl_renderPointsNormals(const UMat points, const UMat normals,
UMat img, Affine3f lightPose)
{
throw std::runtime_error("Not implemented");
CV_TRACE_FUNCTION();
cv::String errorStr;
cv::String name = "render";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr);
if(k.empty())
return false;
Vec4f lightPt(lightPose.translation()[0],
lightPose.translation()[1],
lightPose.translation()[2]);
Size frameSize = points.size();
k.args(ocl::KernelArg::ReadOnlyNoSize(points),
ocl::KernelArg::ReadOnlyNoSize(normals),
ocl::KernelArg::WriteOnlyNoSize(img),
frameSize,
lightPt.val);
size_t globalSize[2];
globalSize[0] = (size_t)points.cols;
globalSize[1] = (size_t)points.rows;
return k.run(2, globalSize, NULL, true);
}
void FrameGeneratorGPU::operator ()(Ptr<Frame> /*frame*/, InputArray /*_points*/, InputArray /*_normals*/, int /*levels*/) const
static bool ocl_makeFrameFromDepth(const UMat depth, OutputArrayOfArrays points, OutputArrayOfArrays normals,
const Intr intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize)
{
throw std::runtime_error("Not implemented");
CV_TRACE_FUNCTION();
// looks like OpenCV's bilateral filter works the same as KinFu's
UMat smooth;
//TODO: fix that
// until 32f isn't implemented in OpenCV, we should use our workarounds
//bilateralFilter(udepth, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial);
if(!customBilateralFilterGpu(depth, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial))
return false;
// depth truncation is not used by default
//if (p.icp_truncate_depth_dist > 0) kfusion::cuda::depthTruncation(curr_.depth_pyr[0], p.icp_truncate_depth_dist);
UMat scaled = smooth;
Size sz = smooth.size();
points.create(levels, 1, POINT_TYPE);
normals.create(levels, 1, POINT_TYPE);
for(int i = 0; i < levels; i++)
{
UMat& p = points.getUMatRef(i);
UMat& n = normals.getUMatRef(i);
p.create(sz, POINT_TYPE);
n.create(sz, POINT_TYPE);
if(!computePointsNormalsGpu(intr.scale(i), depthFactor, scaled, p, n))
return false;
if(i < levels - 1)
{
sz.width /= 2, sz.height /= 2;
UMat halfDepth(sz, DEPTH_TYPE);
pyrDownBilateralGpu(scaled, halfDepth, sigmaDepth*depthFactor);
scaled = halfDepth;
}
}
return true;
}
void FrameGPU::render(OutputArray /* image */, int /*level*/, Affine3f /*lightPose*/) const
static bool ocl_buildPyramidPointsNormals(const UMat points, const UMat normals,
OutputArrayOfArrays pyrPoints, OutputArrayOfArrays pyrNormals,
int levels)
{
throw std::runtime_error("Not implemented");
CV_TRACE_FUNCTION();
pyrPoints .create(levels, 1, POINT_TYPE);
pyrNormals.create(levels, 1, POINT_TYPE);
pyrPoints .getUMatRef(0) = points;
pyrNormals.getUMatRef(0) = normals;
Size sz = points.size();
for(int i = 1; i < levels; i++)
{
UMat p1 = pyrPoints .getUMat(i-1);
UMat n1 = pyrNormals.getUMat(i-1);
sz.width /= 2; sz.height /= 2;
UMat& p0 = pyrPoints .getUMatRef(i);
UMat& n0 = pyrNormals.getUMatRef(i);
p0.create(sz, POINT_TYPE);
n0.create(sz, POINT_TYPE);
if(!pyrDownPointsNormalsGpu(p1, n1, p0, n0))
return false;
}
return true;
}
void FrameGPU::getDepth(OutputArray /* depth */) const
#endif
void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray image, Affine3f lightPose)
{
throw std::runtime_error("Not implemented");
CV_TRACE_FUNCTION();
CV_Assert(_points.size().area() > 0);
CV_Assert(_points.size() == _normals.size());
Size sz = _points.size();
image.create(sz, CV_8UC4);
CV_OCL_RUN(_points.isUMat() && _normals.isUMat() && image.isUMat(),
ocl_renderPointsNormals(_points.getUMat(),
_normals.getUMat(),
image.getUMat(), lightPose))
Points points = _points.getMat();
Normals normals = _normals.getMat();
Mat_<Vec4b> img = image.getMat();
RenderInvoker ri(points, normals, img, lightPose, sz);
Range range(0, sz.height);
const int nstripes = -1;
parallel_for_(range, ri, nstripes);
}
void makeFrameFromDepth(InputArray _depth,
OutputArray pyrPoints, OutputArray pyrNormals,
const Intr intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize)
{
CV_TRACE_FUNCTION();
CV_Assert(_depth.type() == DEPTH_TYPE);
CV_OCL_RUN(_depth.isUMat() && pyrPoints.isUMatVector() && pyrNormals.isUMatVector(),
ocl_makeFrameFromDepth(_depth.getUMat(), pyrPoints, pyrNormals,
intr, levels, depthFactor,
sigmaDepth, sigmaSpatial, kernelSize));
int kp = pyrPoints.kind(), kn = pyrNormals.kind();
CV_Assert(kp == _InputArray::STD_ARRAY_MAT || kp == _InputArray::STD_VECTOR_MAT);
CV_Assert(kn == _InputArray::STD_ARRAY_MAT || kn == _InputArray::STD_VECTOR_MAT);
Depth depth = _depth.getMat();
// looks like OpenCV's bilateral filter works the same as KinFu's
Depth smooth;
//TODO: remove it when OpenCV's bilateral works properly
patchNaNs(depth);
bilateralFilter(depth, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial);
// depth truncation is not used by default
//if (p.icp_truncate_depth_dist > 0) kfusion::cuda::depthTruncation(curr_.depth_pyr[0], p.icp_truncate_depth_dist);
// we don't need depth pyramid outside this method
// if we do, the code is to be refactored
Depth scaled = smooth;
Size sz = smooth.size();
pyrPoints.create(levels, 1, POINT_TYPE);
pyrNormals.create(levels, 1, POINT_TYPE);
for(int i = 0; i < levels; i++)
{
pyrPoints .create(sz, POINT_TYPE, i);
pyrNormals.create(sz, POINT_TYPE, i);
Points p = pyrPoints. getMatRef(i);
Normals n = pyrNormals.getMatRef(i);
computePointsNormals(intr.scale(i), depthFactor, scaled, p, n);
if(i < levels - 1)
{
sz.width /= 2; sz.height /= 2;
scaled = pyrDownBilateral(scaled, sigmaDepth*depthFactor);
}
}
}
cv::Ptr<FrameGenerator> makeFrameGenerator(cv::kinfu::Params::PlatformType t)
void buildPyramidPointsNormals(InputArray _points, InputArray _normals,
OutputArrayOfArrays pyrPoints, OutputArrayOfArrays pyrNormals,
int levels)
{
switch (t)
CV_TRACE_FUNCTION();
CV_Assert(_points.type() == POINT_TYPE);
CV_Assert(_points.type() == _normals.type());
CV_Assert(_points.size() == _normals.size());
CV_OCL_RUN(_points.isUMat() && _normals.isUMat() &&
pyrPoints.isUMatVector() && pyrNormals.isUMatVector(),
ocl_buildPyramidPointsNormals(_points.getUMat(), _normals.getUMat(),
pyrPoints, pyrNormals,
levels));
int kp = pyrPoints.kind(), kn = pyrNormals.kind();
CV_Assert(kp == _InputArray::STD_ARRAY_MAT || kp == _InputArray::STD_VECTOR_MAT);
CV_Assert(kn == _InputArray::STD_ARRAY_MAT || kn == _InputArray::STD_VECTOR_MAT);
Mat p0 = _points.getMat(), n0 = _normals.getMat();
pyrPoints .create(levels, 1, POINT_TYPE);
pyrNormals.create(levels, 1, POINT_TYPE);
pyrPoints .getMatRef(0) = p0;
pyrNormals.getMatRef(0) = n0;
Size sz = _points.size();
for(int i = 1; i < levels; i++)
{
case cv::kinfu::Params::PlatformType::PLATFORM_CPU:
return cv::makePtr<FrameGeneratorCPU>();
case cv::kinfu::Params::PlatformType::PLATFORM_GPU:
return cv::makePtr<FrameGeneratorGPU>();
default:
return cv::Ptr<FrameGenerator>();
Points p1 = pyrPoints .getMat(i-1);
Normals n1 = pyrNormals.getMat(i-1);
sz.width /= 2; sz.height /= 2;
pyrPoints .create(sz, POINT_TYPE, i);
pyrNormals.create(sz, POINT_TYPE, i);
Points pd = pyrPoints. getMatRef(i);
Normals nd = pyrNormals.getMatRef(i);
pyrDownPointsNormals(p1, n1, pd, nd);
}
}

@ -70,52 +70,24 @@ inline ptype toPtype(const cv::Vec3f& x)
return ptype(x[0], x[1], x[2], 0);
}
typedef cv::Mat_< ptype > Points;
typedef Points Normals;
typedef cv::Mat_< depthType > Depth;
struct Frame
enum
{
public:
virtual void render(cv::OutputArray image, int level, cv::Affine3f lightPose) const = 0;
virtual void getDepth(cv::OutputArray depth) const = 0;
virtual ~Frame() { }
DEPTH_TYPE = DataType<depthType>::type,
POINT_TYPE = DataType<ptype >::type
};
struct FrameCPU : Frame
{
public:
FrameCPU() : points(), normals() { }
virtual ~FrameCPU() { }
virtual void render(cv::OutputArray image, int level, cv::Affine3f lightPose) const override;
virtual void getDepth(cv::OutputArray depth) const override;
std::vector<Points> points;
std::vector<Normals> normals;
Depth depthData;
};
struct FrameGPU : Frame
{
public:
virtual void render(cv::OutputArray image, int level, cv::Affine3f lightPose) const override;
virtual void getDepth(cv::OutputArray depth) const override;
virtual ~FrameGPU() { }
};
typedef cv::Mat_< ptype > Points;
typedef Points Normals;
struct FrameGenerator
{
public:
virtual cv::Ptr<Frame> operator ()() const = 0;
virtual void operator() (cv::Ptr<Frame> frame, cv::InputArray depth, const cv::kinfu::Intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize) const = 0;
virtual void operator() (cv::Ptr<Frame> frame, cv::InputArray points, cv::InputArray normals, int levels) const = 0;
virtual ~FrameGenerator() {}
};
typedef cv::Mat_< depthType > Depth;
cv::Ptr<FrameGenerator> makeFrameGenerator(cv::kinfu::Params::PlatformType t);
void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray image, cv::Affine3f lightPose);
void makeFrameFromDepth(InputArray depth, OutputArray pyrPoints, OutputArray pyrNormals,
const Intr intr, int levels, float depthFactor,
float sigmaDepth, float sigmaSpatial, int kernelSize);
void buildPyramidPointsNormals(InputArray _points, InputArray _normals,
OutputArrayOfArrays pyrPoints, OutputArrayOfArrays pyrNormals,
int levels);
} // namespace kinfu
} // namespace cv

@ -1490,21 +1490,10 @@ Size FastICPOdometry::prepareFrameCache(Ptr<OdometryFrame>& frame, int cacheType
checkDepth(frame->depth, frame->depth.size());
// mask isn't used by FastICP
Ptr<FrameGenerator> fg = makeFrameGenerator(Params::PlatformType::PLATFORM_CPU);
Ptr<FrameCPU> f = (*fg)().dynamicCast<FrameCPU>();
Intr intr(cameraMatrix);
float depthFactor = 1.f; // user should rescale depth manually
(*fg)(f, frame->depth, intr, (int)iterCounts.total(), depthFactor,
sigmaDepth, sigmaSpatial, kernelSize);
frame->pyramidCloud.clear();
frame->pyramidNormals.clear();
for(size_t i = 0; i < f->points.size(); i++)
{
frame->pyramidCloud.push_back(f->points[i]);
frame->pyramidNormals.push_back(f->normals[i]);
}
makeFrameFromDepth(frame->depth, frame->pyramidCloud, frame->pyramidNormals, intr, (int)iterCounts.total(),
depthFactor, sigmaDepth, sigmaSpatial, kernelSize);
return frame->depth.size();
}
@ -1526,22 +1515,16 @@ bool FastICPOdometry::computeImpl(const Ptr<OdometryFrame>& srcFrame,
{
kinfu::Intr intr(cameraMatrix);
std::vector<int> iterations = iterCounts;
Ptr<kinfu::ICP> icp = kinfu::makeICP(kinfu::Params::PlatformType::PLATFORM_CPU,
intr,
Ptr<kinfu::ICP> icp = kinfu::makeICP(intr,
iterations,
angleThreshold,
maxDistDiff);
// KinFu's ICP calculates transformation from new frame to old one (src to dst)
Affine3f transform;
Ptr<FrameCPU> srcF = makePtr<FrameCPU>(), dstF = makePtr<FrameCPU>();
for(size_t i = 0; i < srcFrame->pyramidCloud.size(); i++)
{
srcF-> points.push_back(srcFrame->pyramidCloud [i]);
srcF->normals.push_back(srcFrame->pyramidNormals[i]);
dstF-> points.push_back(dstFrame->pyramidCloud [i]);
dstF->normals.push_back(dstFrame->pyramidNormals[i]);
}
bool result = icp->estimateTransform(transform, dstF, srcF);
bool result = icp->estimateTransform(transform,
dstFrame->pyramidCloud, dstFrame->pyramidNormals,
srcFrame->pyramidCloud, srcFrame->pyramidNormals);
Rt.create(Size(4, 4), CV_64FC1);
Mat(Matx44d(transform.matrix)).copyTo(Rt.getMat());

@ -0,0 +1,239 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
#define UTSIZE 27
typedef float4 ptype;
/*
Calculate an upper triangle of Ab matrix then reduce it across workgroup
*/
inline void calcAb7(__global const char * oldPointsptr,
int oldPoints_step, int oldPoints_offset,
__global const char * oldNormalsptr,
int oldNormals_step, int oldNormals_offset,
const int2 oldSize,
__global const char * newPointsptr,
int newPoints_step, int newPoints_offset,
__global const char * newNormalsptr,
int newNormals_step, int newNormals_offset,
const int2 newSize,
const float16 poseMatrix,
const float2 fxy,
const float2 cxy,
const float sqDistanceThresh,
const float minCos,
float* ab7
)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= newSize.x || y >= newSize.y)
return;
// coord-independent constants
const float3 poseRot0 = poseMatrix.s012;
const float3 poseRot1 = poseMatrix.s456;
const float3 poseRot2 = poseMatrix.s89a;
const float3 poseTrans = poseMatrix.s37b;
const float2 oldEdge = (float2)(oldSize.x - 1, oldSize.y - 1);
__global const ptype* newPtsRow = (__global const ptype*)(newPointsptr +
newPoints_offset +
y*newPoints_step);
__global const ptype* newNrmRow = (__global const ptype*)(newNormalsptr +
newNormals_offset +
y*newNormals_step);
float3 newP = newPtsRow[x].xyz;
float3 newN = newNrmRow[x].xyz;
if(any(isnan(newP)) || any(isnan(newN)))
return;
//transform to old coord system
newP = (float3)(dot(newP, poseRot0),
dot(newP, poseRot1),
dot(newP, poseRot2)) + poseTrans;
newN = (float3)(dot(newN, poseRot0),
dot(newN, poseRot1),
dot(newN, poseRot2));
//find correspondence by projecting the point
float2 oldCoords = (newP.xy/newP.z)*fxy+cxy;
if(!(all(oldCoords >= 0.f) && all(oldCoords < oldEdge)))
return;
// bilinearly interpolate oldPts and oldNrm under oldCoords point
float3 oldP, oldN;
float2 ip = floor(oldCoords);
float2 t = oldCoords - ip;
int xi = ip.x, yi = ip.y;
__global const ptype* prow0 = (__global const ptype*)(oldPointsptr +
oldPoints_offset +
(yi+0)*oldPoints_step);
__global const ptype* prow1 = (__global const ptype*)(oldPointsptr +
oldPoints_offset +
(yi+1)*oldPoints_step);
float3 p00 = prow0[xi+0].xyz;
float3 p01 = prow0[xi+1].xyz;
float3 p10 = prow1[xi+0].xyz;
float3 p11 = prow1[xi+1].xyz;
// NaN check is done later
__global const ptype* nrow0 = (__global const ptype*)(oldNormalsptr +
oldNormals_offset +
(yi+0)*oldNormals_step);
__global const ptype* nrow1 = (__global const ptype*)(oldNormalsptr +
oldNormals_offset +
(yi+1)*oldNormals_step);
float3 n00 = nrow0[xi+0].xyz;
float3 n01 = nrow0[xi+1].xyz;
float3 n10 = nrow1[xi+0].xyz;
float3 n11 = nrow1[xi+1].xyz;
// NaN check is done later
float3 p0 = mix(p00, p01, t.x);
float3 p1 = mix(p10, p11, t.x);
oldP = mix(p0, p1, t.y);
float3 n0 = mix(n00, n01, t.x);
float3 n1 = mix(n10, n11, t.x);
oldN = mix(n0, n1, t.y);
if(any(isnan(oldP)) || any(isnan(oldN)))
return;
//filter by distance
float3 diff = newP - oldP;
if(dot(diff, diff) > sqDistanceThresh)
return;
//filter by angle
if(fabs(dot(newN, oldN)) < minCos)
return;
// build point-wise vector ab = [ A | b ]
float3 VxN = cross(newP, oldN);
float ab[7] = {VxN.x, VxN.y, VxN.z, oldN.x, oldN.y, oldN.z, -dot(oldN, diff)};
for(int i = 0; i < 7; i++)
ab7[i] = ab[i];
}
__kernel void getAb(__global const char * oldPointsptr,
int oldPoints_step, int oldPoints_offset,
__global const char * oldNormalsptr,
int oldNormals_step, int oldNormals_offset,
const int2 oldSize,
__global const char * newPointsptr,
int newPoints_step, int newPoints_offset,
__global const char * newNormalsptr,
int newNormals_step, int newNormals_offset,
const int2 newSize,
const float16 poseMatrix,
const float2 fxy,
const float2 cxy,
const float sqDistanceThresh,
const float minCos,
__local float * reducebuf,
__global char* groupedSumptr,
int groupedSum_step, int groupedSum_offset
)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= newSize.x || y >= newSize.y)
return;
const int gx = get_group_id(0);
const int gy = get_group_id(1);
const int gw = get_num_groups(0);
const int gh = get_num_groups(1);
const int lx = get_local_id(0);
const int ly = get_local_id(1);
const int lw = get_local_size(0);
const int lh = get_local_size(1);
const int lsz = lw*lh;
const int lid = lx + ly*lw;
float ab[7];
for(int i = 0; i < 7; i++)
ab[i] = 0;
calcAb7(oldPointsptr,
oldPoints_step, oldPoints_offset,
oldNormalsptr,
oldNormals_step, oldNormals_offset,
oldSize,
newPointsptr,
newPoints_step, newPoints_offset,
newNormalsptr,
newNormals_step, newNormals_offset,
newSize,
poseMatrix,
fxy, cxy,
sqDistanceThresh,
minCos,
ab);
// build point-wise upper-triangle matrix [ab^T * ab] w/o last row
// which is [A^T*A | A^T*b]
// and gather sum
__local float* upperTriangle = reducebuf + lid*UTSIZE;
int pos = 0;
for(int i = 0; i < 6; i++)
{
for(int j = i; j < 7; j++)
{
upperTriangle[pos++] = ab[i]*ab[j];
}
}
// reduce upperTriangle to local mem
// maxStep = ctz(lsz), ctz isn't supported on CUDA devices
const int c = clz(lsz & -lsz);
const int maxStep = c ? 31 - c : c;
for(int nstep = 1; nstep <= maxStep; nstep++)
{
if(lid % (1 << nstep) == 0)
{
__local float* rto = reducebuf + UTSIZE*lid;
__local float* rfrom = reducebuf + UTSIZE*(lid+(1 << (nstep-1)));
for(int i = 0; i < UTSIZE; i++)
rto[i] += rfrom[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
// here group sum should be in reducebuf[0...UTSIZE]
if(lid == 0)
{
__global float* groupedRow = (__global float*)(groupedSumptr +
groupedSum_offset +
gy*groupedSum_step);
for(int i = 0; i < UTSIZE; i++)
groupedRow[gx*UTSIZE + i] = reducebuf[i];
}
}

@ -0,0 +1,287 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
inline float3 reproject(float3 p, float2 fxyinv, float2 cxy)
{
float2 pp = p.z*(p.xy - cxy)*fxyinv;
return (float3)(pp, p.z);
}
typedef float4 ptype;
__kernel void computePointsNormals(__global char * pointsptr,
int points_step, int points_offset,
__global char * normalsptr,
int normals_step, int normals_offset,
__global const char * depthptr,
int depth_step, int depth_offset,
int depth_rows, int depth_cols,
const float2 fxyinv,
const float2 cxy,
const float dfac
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x >= depth_cols || y >= depth_rows)
return;
__global const float* row0 = (__global const float*)(depthptr + depth_offset +
(y+0)*depth_step);
__global const float* row1 = (__global const float*)(depthptr + depth_offset +
(y+1)*depth_step);
float d00 = row0[x];
float z00 = d00*dfac;
float3 p00 = (float3)(convert_float2((int2)(x, y)), z00);
float3 v00 = reproject(p00, fxyinv, cxy);
float3 p = nan((uint)0), n = nan((uint)0);
if(x < depth_cols - 1 && y < depth_rows - 1)
{
float d01 = row0[x+1];
float d10 = row1[x];
float z01 = d01*dfac;
float z10 = d10*dfac;
if(z00 != 0 && z01 != 0 && z10 != 0)
{
float3 p01 = (float3)(convert_float2((int2)(x+1, y+0)), z01);
float3 p10 = (float3)(convert_float2((int2)(x+0, y+1)), z10);
float3 v01 = reproject(p01, fxyinv, cxy);
float3 v10 = reproject(p10, fxyinv, cxy);
float3 vec = cross(v01 - v00, v10 - v00);
n = - normalize(vec);
p = v00;
}
}
__global float* pts = (__global float*)(pointsptr + points_offset + y*points_step + x*sizeof(ptype));
__global float* nrm = (__global float*)(normalsptr + normals_offset + y*normals_step + x*sizeof(ptype));
vstore4((ptype)(p, 0), 0, pts);
vstore4((ptype)(n, 0), 0, nrm);
}
__kernel void pyrDownBilateral(__global const char * depthptr,
int depth_step, int depth_offset,
int depth_rows, int depth_cols,
__global char * depthDownptr,
int depthDown_step, int depthDown_offset,
int depthDown_rows, int depthDown_cols,
const float sigma
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x >= depthDown_cols || y >= depthDown_rows)
return;
const float sigma3 = sigma*3;
const int D = 5;
__global const float* srcCenterRow = (__global const float*)(depthptr + depth_offset +
(2*y)*depth_step);
float center = srcCenterRow[2*x];
int sx = max(0, 2*x - D/2), ex = min(2*x - D/2 + D, depth_cols-1);
int sy = max(0, 2*y - D/2), ey = min(2*y - D/2 + D, depth_rows-1);
float sum = 0;
int count = 0;
for(int iy = sy; iy < ey; iy++)
{
__global const float* srcRow = (__global const float*)(depthptr + depth_offset +
(iy)*depth_step);
for(int ix = sx; ix < ex; ix++)
{
float val = srcRow[ix];
if(fabs(val - center) < sigma3)
{
sum += val; count++;
}
}
}
__global float* downRow = (__global float*)(depthDownptr + depthDown_offset +
y*depthDown_step + x*sizeof(float));
*downRow = (count == 0) ? 0 : sum/convert_float(count);
}
//TODO: remove bilateral when OpenCV performs 32f bilat with OpenCL
__kernel void customBilateral(__global const char * srcptr,
int src_step, int src_offset,
__global char * dstptr,
int dst_step, int dst_offset,
const int2 frameSize,
const int kernelSize,
const float sigma_spatial2_inv_half,
const float sigma_depth2_inv_half
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x >= frameSize.x || y >= frameSize.y)
return;
__global const float* srcCenterRow = (__global const float*)(srcptr + src_offset +
y*src_step);
float value = srcCenterRow[x];
int tx = min (x - kernelSize / 2 + kernelSize, frameSize.x - 1);
int ty = min (y - kernelSize / 2 + kernelSize, frameSize.y - 1);
float sum1 = 0;
float sum2 = 0;
for (int cy = max (y - kernelSize / 2, 0); cy < ty; ++cy)
{
__global const float* srcRow = (__global const float*)(srcptr + src_offset +
cy*src_step);
for (int cx = max (x - kernelSize / 2, 0); cx < tx; ++cx)
{
float depth = srcRow[cx];
float space2 = convert_float((x - cx) * (x - cx) + (y - cy) * (y - cy));
float color2 = (value - depth) * (value - depth);
float weight = native_exp (-(space2 * sigma_spatial2_inv_half +
color2 * sigma_depth2_inv_half));
sum1 += depth * weight;
sum2 += weight;
}
}
__global float* dst = (__global float*)(dstptr + dst_offset +
y*dst_step + x*sizeof(float));
*dst = sum1/sum2;
}
__kernel void pyrDownPointsNormals(__global const char * pptr,
int p_step, int p_offset,
__global const char * nptr,
int n_step, int n_offset,
__global char * pdownptr,
int pdown_step, int pdown_offset,
__global char * ndownptr,
int ndown_step, int ndown_offset,
const int2 downSize
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x >= downSize.x || y >= downSize.y)
return;
float3 point = nan((uint)0), normal = nan((uint)0);
__global const ptype* pUpRow0 = (__global const ptype*)(pptr + p_offset + (2*y )*p_step);
__global const ptype* pUpRow1 = (__global const ptype*)(pptr + p_offset + (2*y+1)*p_step);
float3 d00 = pUpRow0[2*x ].xyz;
float3 d01 = pUpRow0[2*x+1].xyz;
float3 d10 = pUpRow1[2*x ].xyz;
float3 d11 = pUpRow1[2*x+1].xyz;
if(!(any(isnan(d00)) || any(isnan(d01)) ||
any(isnan(d10)) || any(isnan(d11))))
{
point = (d00 + d01 + d10 + d11)*0.25f;
__global const ptype* nUpRow0 = (__global const ptype*)(nptr + n_offset + (2*y )*n_step);
__global const ptype* nUpRow1 = (__global const ptype*)(nptr + n_offset + (2*y+1)*n_step);
float3 n00 = nUpRow0[2*x ].xyz;
float3 n01 = nUpRow0[2*x+1].xyz;
float3 n10 = nUpRow1[2*x ].xyz;
float3 n11 = nUpRow1[2*x+1].xyz;
normal = (n00 + n01 + n10 + n11)*0.25f;
}
__global ptype* pts = (__global ptype*)(pdownptr + pdown_offset + y*pdown_step);
__global ptype* nrm = (__global ptype*)(ndownptr + ndown_offset + y*ndown_step);
pts[x] = (ptype)(point, 0);
nrm[x] = (ptype)(normal, 0);
}
typedef char4 pixelType;
// 20 is fixed power
float specPow20(float x)
{
float x2 = x*x;
float x5 = x2*x2*x;
float x10 = x5*x5;
float x20 = x10*x10;
return x20;
}
__kernel void render(__global const char * pointsptr,
int points_step, int points_offset,
__global const char * normalsptr,
int normals_step, int normals_offset,
__global char * imgptr,
int img_step, int img_offset,
const int2 frameSize,
const float4 lightPt
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x >= frameSize.x || y >= frameSize.y)
return;
__global const ptype* ptsRow = (__global const ptype*)(pointsptr + points_offset + y*points_step + x*sizeof(ptype));
__global const ptype* nrmRow = (__global const ptype*)(normalsptr + normals_offset + y*normals_step + x*sizeof(ptype));
float3 p = (*ptsRow).xyz;
float3 n = (*nrmRow).xyz;
pixelType color;
if(any(isnan(p)))
{
color = (pixelType)(0, 32, 0, 0);
}
else
{
const float Ka = 0.3f; //ambient coeff
const float Kd = 0.5f; //diffuse coeff
const float Ks = 0.2f; //specular coeff
//const int sp = 20; //specular power, fixed in specPow20()
const float Ax = 1.f; //ambient color, can be RGB
const float Dx = 1.f; //diffuse color, can be RGB
const float Sx = 1.f; //specular color, can be RGB
const float Lx = 1.f; //light color
float3 l = normalize(lightPt.xyz - p);
float3 v = normalize(-p);
float3 r = normalize(2.f*n*dot(n, l) - l);
float val = (Ax*Ka*Dx + Lx*Kd*Dx*max(0.f, dot(n, l)) +
Lx*Ks*Sx*specPow20(max(0.f, dot(r, v))));
uchar ix = convert_uchar(val*255.f);
color = (pixelType)(ix, ix, ix, 0);
}
__global char* imgRow = (__global char*)(imgptr + img_offset + y*img_step + x*sizeof(pixelType));
vstore4(color, 0, imgRow);
}

@ -0,0 +1,825 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
__kernel void integrate(__global const char * depthptr,
int depth_step, int depth_offset,
int depth_rows, int depth_cols,
__global float2 * volumeptr,
const float16 vol2camMatrix,
const float voxelSize,
const int4 volResolution4,
const int4 volDims4,
const float2 fxy,
const float2 cxy,
const float dfac,
const float truncDist,
const int maxWeight)
{
int x = get_global_id(0);
int y = get_global_id(1);
const int3 volResolution = volResolution4.xyz;
if(x >= volResolution.x || y >= volResolution.y)
return;
// coord-independent constants
const int3 volDims = volDims4.xyz;
const float2 limits = (float2)(depth_cols-1, depth_rows-1);
const float4 vol2cam0 = vol2camMatrix.s0123;
const float4 vol2cam1 = vol2camMatrix.s4567;
const float4 vol2cam2 = vol2camMatrix.s89ab;
const float truncDistInv = 1.f/truncDist;
// optimization of camSpace transformation (vector addition instead of matmul at each z)
float4 inPt = (float4)(x*voxelSize, y*voxelSize, 0, 1);
float3 basePt = (float3)(dot(vol2cam0, inPt),
dot(vol2cam1, inPt),
dot(vol2cam2, inPt));
float3 camSpacePt = basePt;
// zStep == vol2cam*(float3(x, y, 1)*voxelSize) - basePt;
float3 zStep = ((float3)(vol2cam0.z, vol2cam1.z, vol2cam2.z))*voxelSize;
int volYidx = x*volDims.x + y*volDims.y;
int startZ, endZ;
if(fabs(zStep.z) > 1e-5)
{
int baseZ = convert_int(-basePt.z / zStep.z);
if(zStep.z > 0)
{
startZ = baseZ;
endZ = volResolution.z;
}
else
{
startZ = 0;
endZ = baseZ;
}
}
else
{
if(basePt.z > 0)
{
startZ = 0; endZ = volResolution.z;
}
else
{
// z loop shouldn't be performed
//startZ = endZ = 0;
return;
}
}
startZ = max(0, startZ);
endZ = min(volResolution.z, endZ);
for(int z = startZ; z < endZ; z++)
{
// optimization of the following:
//float3 camSpacePt = vol2cam * ((float3)(x, y, z)*voxelSize);
camSpacePt += zStep;
if(camSpacePt.z <= 0)
continue;
float3 camPixVec = camSpacePt / camSpacePt.z;
float2 projected = mad(camPixVec.xy, fxy, cxy);
float v;
// bilinearly interpolate depth at projected
if(all(projected >= 0) && all(projected < limits))
{
float2 ip = floor(projected);
int xi = ip.x, yi = ip.y;
__global const float* row0 = (__global const float*)(depthptr + depth_offset +
(yi+0)*depth_step);
__global const float* row1 = (__global const float*)(depthptr + depth_offset +
(yi+1)*depth_step);
float v00 = row0[xi+0];
float v01 = row0[xi+1];
float v10 = row1[xi+0];
float v11 = row1[xi+1];
float4 vv = (float4)(v00, v01, v10, v11);
// assume correct depth is positive
if(all(vv > 0))
{
float2 t = projected - ip;
float2 vf = mix(vv.xz, vv.yw, t.x);
v = mix(vf.s0, vf.s1, t.y);
}
else
continue;
}
else
continue;
if(v == 0)
continue;
float pixNorm = length(camPixVec);
// difference between distances of point and of surface to camera
float sdf = pixNorm*(v*dfac - camSpacePt.z);
// possible alternative is:
// float sdf = length(camSpacePt)*(v*dfac/camSpacePt.z - 1.0);
if(sdf >= -truncDist)
{
float tsdf = fmin(1.0f, sdf * truncDistInv);
int volIdx = volYidx + z*volDims.z;
float2 voxel = volumeptr[volIdx];
float value = voxel.s0;
int weight = as_int(voxel.s1);
// update TSDF
value = (value*weight + tsdf) / (weight + 1);
weight = min(weight + 1, maxWeight);
voxel.s0 = value;
voxel.s1 = as_float(weight);
volumeptr[volIdx] = voxel;
}
}
}
inline float interpolateVoxel(float3 p, __global const float2* volumePtr,
int3 volDims, int8 neighbourCoords)
{
float3 fip = floor(p);
int3 ip = convert_int3(fip);
float3 t = p - fip;
int3 cmul = volDims*ip;
int coordBase = cmul.x + cmul.y + cmul.z;
int nco[8];
vstore8(neighbourCoords + coordBase, 0, nco);
float vaz[8];
for(int i = 0; i < 8; i++)
vaz[i] = volumePtr[nco[i]].s0;
float8 vz = vload8(0, vaz);
float4 vy = mix(vz.s0246, vz.s1357, t.z);
float2 vx = mix(vy.s02, vy.s13, t.y);
return mix(vx.s0, vx.s1, t.x);
}
inline float3 getNormalVoxel(float3 p, __global const float2* volumePtr,
int3 volResolution, int3 volDims, int8 neighbourCoords)
{
if(any(p < 1) || any(p >= convert_float3(volResolution - 2)))
return nan((uint)0);
float3 fip = floor(p);
int3 ip = convert_int3(fip);
float3 t = p - fip;
int3 cmul = volDims*ip;
int coordBase = cmul.x + cmul.y + cmul.z;
int nco[8];
vstore8(neighbourCoords + coordBase, 0, nco);
int arDims[3];
vstore3(volDims, 0, arDims);
float an[3];
for(int c = 0; c < 3; c++)
{
int dim = arDims[c];
float vaz[8];
for(int i = 0; i < 8; i++)
vaz[i] = volumePtr[nco[i] + dim].s0 -
volumePtr[nco[i] - dim].s0;
float8 vz = vload8(0, vaz);
float4 vy = mix(vz.s0246, vz.s1357, t.z);
float2 vx = mix(vy.s02, vy.s13, t.y);
an[c] = mix(vx.s0, vx.s1, t.x);
}
//gradientDeltaFactor is fixed at 1.0 of voxel size
return fast_normalize(vload3(0, an));
}
typedef float4 ptype;
__kernel void raycast(__global char * pointsptr,
int points_step, int points_offset,
__global char * normalsptr,
int normals_step, int normals_offset,
const int2 frameSize,
__global const float2 * volumeptr,
__global const float * vol2camptr,
__global const float * cam2volptr,
const float2 fixy,
const float2 cxy,
const float4 boxDown4,
const float4 boxUp4,
const float tstep,
const float voxelSize,
const int4 volResolution4,
const int4 volDims4,
const int8 neighbourCoords
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x >= frameSize.x || y >= frameSize.y)
return;
// coordinate-independent constants
__global const float* cm = cam2volptr;
const float3 camRot0 = vload4(0, cm).xyz;
const float3 camRot1 = vload4(1, cm).xyz;
const float3 camRot2 = vload4(2, cm).xyz;
const float3 camTrans = (float3)(cm[3], cm[7], cm[11]);
__global const float* vm = vol2camptr;
const float3 volRot0 = vload4(0, vm).xyz;
const float3 volRot1 = vload4(1, vm).xyz;
const float3 volRot2 = vload4(2, vm).xyz;
const float3 volTrans = (float3)(vm[3], vm[7], vm[11]);
const float3 boxDown = boxDown4.xyz;
const float3 boxUp = boxUp4.xyz;
const int3 volDims = volDims4.xyz;
const int3 volResolution = volResolution4.xyz;
const float invVoxelSize = native_recip(voxelSize);
// kernel itself
float3 point = nan((uint)0);
float3 normal = nan((uint)0);
float3 orig = camTrans;
// get direction through pixel in volume space:
// 1. reproject (x, y) on projecting plane where z = 1.f
float3 planed = (float3)(((float2)(x, y) - cxy)*fixy, 1.f);
// 2. rotate to volume space
planed = (float3)(dot(planed, camRot0),
dot(planed, camRot1),
dot(planed, camRot2));
// 3. normalize
float3 dir = fast_normalize(planed);
// compute intersection of ray with all six bbox planes
float3 rayinv = native_recip(dir);
float3 tbottom = rayinv*(boxDown - orig);
float3 ttop = rayinv*(boxUp - orig);
// re-order intersections to find smallest and largest on each axis
float3 minAx = min(ttop, tbottom);
float3 maxAx = max(ttop, tbottom);
// near clipping plane
const float clip = 0.f;
float tmin = max(max(max(minAx.x, minAx.y), max(minAx.x, minAx.z)), clip);
float tmax = min(min(maxAx.x, maxAx.y), min(maxAx.x, maxAx.z));
// precautions against getting coordinates out of bounds
tmin = tmin + tstep;
tmax = tmax - tstep;
if(tmin < tmax)
{
// interpolation optimized a little
orig *= invVoxelSize;
dir *= invVoxelSize;
float3 rayStep = dir*tstep;
float3 next = (orig + dir*tmin);
float f = interpolateVoxel(next, volumeptr, volDims, neighbourCoords);
float fnext = f;
// raymarch
int steps = 0;
int nSteps = floor(native_divide(tmax - tmin, tstep));
bool stop = false;
for(int i = 0; i < nSteps; i++)
{
// fix for wrong steps counting
if(!stop)
{
next += rayStep;
// fetch voxel
int3 ip = convert_int3(round(next));
int3 cmul = ip*volDims;
int idx = cmul.x + cmul.y + cmul.z;
fnext = volumeptr[idx].s0;
if(fnext != f)
{
fnext = interpolateVoxel(next, volumeptr, volDims, neighbourCoords);
// when ray crosses a surface
if(signbit(f) != signbit(fnext))
{
stop = true; continue;
}
f = fnext;
}
steps++;
}
}
// if ray penetrates a surface from outside
// linearly interpolate t between two f values
if(f > 0 && fnext < 0)
{
float3 tp = next - rayStep;
float ft = interpolateVoxel(tp, volumeptr, volDims, neighbourCoords);
float ftdt = interpolateVoxel(next, volumeptr, volDims, neighbourCoords);
// float t = tmin + steps*tstep;
// float ts = t - tstep*ft/(ftdt - ft);
float ts = tmin + tstep*(steps - native_divide(ft, ftdt - ft));
// avoid division by zero
if(!isnan(ts) && !isinf(ts))
{
float3 pv = orig + dir*ts;
float3 nv = getNormalVoxel(pv, volumeptr, volResolution, volDims, neighbourCoords);
if(!any(isnan(nv)))
{
//convert pv and nv to camera space
normal = (float3)(dot(nv, volRot0),
dot(nv, volRot1),
dot(nv, volRot2));
// interpolation optimized a little
pv *= voxelSize;
point = (float3)(dot(pv, volRot0),
dot(pv, volRot1),
dot(pv, volRot2)) + volTrans;
}
}
}
}
__global float* pts = (__global float*)(pointsptr + points_offset + y*points_step + x*sizeof(ptype));
__global float* nrm = (__global float*)(normalsptr + normals_offset + y*normals_step + x*sizeof(ptype));
vstore4((float4)(point, 0), 0, pts);
vstore4((float4)(normal, 0), 0, nrm);
}
__kernel void getNormals(__global const char * pointsptr,
int points_step, int points_offset,
__global char * normalsptr,
int normals_step, int normals_offset,
const int2 frameSize,
__global const float2* volumeptr,
__global const float * volPoseptr,
__global const float * invPoseptr,
const float voxelSizeInv,
const int4 volResolution4,
const int4 volDims4,
const int8 neighbourCoords
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x >= frameSize.x || y >= frameSize.y)
return;
// coordinate-independent constants
__global const float* vp = volPoseptr;
const float3 volRot0 = vload4(0, vp).xyz;
const float3 volRot1 = vload4(1, vp).xyz;
const float3 volRot2 = vload4(2, vp).xyz;
const float3 volTrans = (float3)(vp[3], vp[7], vp[11]);
__global const float* iv = invPoseptr;
const float3 invRot0 = vload4(0, iv).xyz;
const float3 invRot1 = vload4(1, iv).xyz;
const float3 invRot2 = vload4(2, iv).xyz;
const float3 invTrans = (float3)(iv[3], iv[7], iv[11]);
const int3 volResolution = volResolution4.xyz;
const int3 volDims = volDims4.xyz;
// kernel itself
__global const ptype* ptsRow = (__global const ptype*)(pointsptr +
points_offset +
y*points_step);
float3 p = ptsRow[x].xyz;
float3 n = nan((uint)0);
if(!any(isnan(p)))
{
float3 voxPt = (float3)(dot(p, invRot0),
dot(p, invRot1),
dot(p, invRot2)) + invTrans;
voxPt = voxPt * voxelSizeInv;
n = getNormalVoxel(voxPt, volumeptr, volResolution, volDims, neighbourCoords);
n = (float3)(dot(n, volRot0),
dot(n, volRot1),
dot(n, volRot2));
}
__global float* nrm = (__global float*)(normalsptr +
normals_offset +
y*normals_step +
x*sizeof(ptype));
vstore4((float4)(n, 0), 0, nrm);
}
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
struct CoordReturn
{
bool result;
float3 point;
float3 normal;
};
inline struct CoordReturn coord(int x, int y, int z, float3 V, float v0, int axis,
__global const float2* volumeptr,
int3 volResolution, int3 volDims,
int8 neighbourCoords,
float voxelSize, float voxelSizeInv,
const float3 volRot0,
const float3 volRot1,
const float3 volRot2,
const float3 volTrans,
bool needNormals,
bool scan
)
{
struct CoordReturn cr;
// 0 for x, 1 for y, 2 for z
bool limits = false;
int3 shift;
float Vc = 0.f;
if(axis == 0)
{
shift = (int3)(1, 0, 0);
limits = (x + 1 < volResolution.x);
Vc = V.x;
}
if(axis == 1)
{
shift = (int3)(0, 1, 0);
limits = (y + 1 < volResolution.y);
Vc = V.y;
}
if(axis == 2)
{
shift = (int3)(0, 0, 1);
limits = (z + 1 < volResolution.z);
Vc = V.z;
}
if(limits)
{
int3 ip = ((int3)(x, y, z)) + shift;
int3 cmul = ip*volDims;
int idx = cmul.x + cmul.y + cmul.z;
float2 voxel = volumeptr[idx].s0;
float vd = voxel.s0;
int weight = as_int(voxel.s1);
if(weight != 0 && vd != 1.f)
{
if((v0 > 0 && vd < 0) || (v0 < 0 && vd > 0))
{
// calc actual values or estimate amount of space
if(!scan)
{
// linearly interpolate coordinate
float Vn = Vc + voxelSize;
float dinv = 1.f/(fabs(v0)+fabs(vd));
float inter = (Vc*fabs(vd) + Vn*fabs(v0))*dinv;
float3 p = (float3)(shift.x ? inter : V.x,
shift.y ? inter : V.y,
shift.z ? inter : V.z);
cr.point = (float3)(dot(p, volRot0),
dot(p, volRot1),
dot(p, volRot2)) + volTrans;
if(needNormals)
{
float3 nv = getNormalVoxel(p * voxelSizeInv,
volumeptr, volResolution, volDims, neighbourCoords);
cr.normal = (float3)(dot(nv, volRot0),
dot(nv, volRot1),
dot(nv, volRot2));
}
}
cr.result = true;
return cr;
}
}
}
cr.result = false;
return cr;
}
__kernel void scanSize(__global const float2* volumeptr,
const int4 volResolution4,
const int4 volDims4,
const int8 neighbourCoords,
__global const float * volPoseptr,
const float voxelSize,
const float voxelSizeInv,
__local int* reducebuf,
__global char* groupedSumptr,
int groupedSum_slicestep,
int groupedSum_step, int groupedSum_offset
)
{
const int3 volDims = volDims4.xyz;
const int3 volResolution = volResolution4.xyz;
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
bool validVoxel = true;
if(x >= volResolution.x || y >= volResolution.y || z >= volResolution.z)
validVoxel = false;
const int gx = get_group_id(0);
const int gy = get_group_id(1);
const int gz = get_group_id(2);
const int lx = get_local_id(0);
const int ly = get_local_id(1);
const int lz = get_local_id(2);
const int lw = get_local_size(0);
const int lh = get_local_size(1);
const int ld = get_local_size(2);
const int lsz = lw*lh*ld;
const int lid = lx + ly*lw + lz*lw*lh;
// coordinate-independent constants
__global const float* vp = volPoseptr;
const float3 volRot0 = vload4(0, vp).xyz;
const float3 volRot1 = vload4(1, vp).xyz;
const float3 volRot2 = vload4(2, vp).xyz;
const float3 volTrans = (float3)(vp[3], vp[7], vp[11]);
// kernel itself
int npts = 0;
if(validVoxel)
{
int3 ip = (int3)(x, y, z);
int3 cmul = ip*volDims;
int idx = cmul.x + cmul.y + cmul.z;
float2 voxel = volumeptr[idx].s0;
float value = voxel.s0;
int weight = as_int(voxel.s1);
// if voxel is not empty
if(weight != 0 && value != 1.f)
{
float3 V = (((float3)(x, y, z)) + 0.5f)*voxelSize;
#pragma unroll
for(int i = 0; i < 3; i++)
{
struct CoordReturn cr;
cr = coord(x, y, z, V, value, i,
volumeptr, volResolution, volDims,
neighbourCoords,
voxelSize, voxelSizeInv,
volRot0, volRot1, volRot2, volTrans,
false, true);
if(cr.result)
{
npts++;
}
}
}
}
// reducebuf keeps counters for each thread
reducebuf[lid] = npts;
// reduce counter to local mem
// maxStep = ctz(lsz), ctz isn't supported on CUDA devices
const int c = clz(lsz & -lsz);
const int maxStep = c ? 31 - c : c;
for(int nstep = 1; nstep <= maxStep; nstep++)
{
if(lid % (1 << nstep) == 0)
{
int rto = lid;
int rfrom = lid + (1 << (nstep-1));
reducebuf[rto] += reducebuf[rfrom];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if(lid == 0)
{
__global int* groupedRow = (__global int*)(groupedSumptr +
groupedSum_offset +
gy*groupedSum_step +
gz*groupedSum_slicestep);
groupedRow[gx] = reducebuf[0];
}
}
__kernel void fillPtsNrm(__global const float2* volumeptr,
const int4 volResolution4,
const int4 volDims4,
const int8 neighbourCoords,
__global const float * volPoseptr,
const float voxelSize,
const float voxelSizeInv,
const int needNormals,
__local float* localbuf,
volatile __global int* atomicCtr,
__global const char* groupedSumptr,
int groupedSum_slicestep,
int groupedSum_step, int groupedSum_offset,
__global char * pointsptr,
int points_step, int points_offset,
__global char * normalsptr,
int normals_step, int normals_offset
)
{
const int3 volDims = volDims4.xyz;
const int3 volResolution = volResolution4.xyz;
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
bool validVoxel = true;
if(x >= volResolution.x || y >= volResolution.y || z >= volResolution.z)
validVoxel = false;
const int gx = get_group_id(0);
const int gy = get_group_id(1);
const int gz = get_group_id(2);
__global int* groupedRow = (__global int*)(groupedSumptr +
groupedSum_offset +
gy*groupedSum_step +
gz*groupedSum_slicestep);
// this group contains 0 pts, skip it
int nptsGroup = groupedRow[gx];
if(nptsGroup == 0)
return;
const int lx = get_local_id(0);
const int ly = get_local_id(1);
const int lz = get_local_id(2);
const int lw = get_local_size(0);
const int lh = get_local_size(1);
const int ld = get_local_size(2);
const int lsz = lw*lh*ld;
const int lid = lx + ly*lw + lz*lw*lh;
// coordinate-independent constants
__global const float* vp = volPoseptr;
const float3 volRot0 = vload4(0, vp).xyz;
const float3 volRot1 = vload4(1, vp).xyz;
const float3 volRot2 = vload4(2, vp).xyz;
const float3 volTrans = (float3)(vp[3], vp[7], vp[11]);
// kernel itself
int npts = 0;
float3 parr[3], narr[3];
if(validVoxel)
{
int3 ip = (int3)(x, y, z);
int3 cmul = ip*volDims;
int idx = cmul.x + cmul.y + cmul.z;
float2 voxel = volumeptr[idx].s0;
float value = voxel.s0;
int weight = as_int(voxel.s1);
// if voxel is not empty
if(weight != 0 && value != 1.f)
{
float3 V = (((float3)(x, y, z)) + 0.5f)*voxelSize;
#pragma unroll
for(int i = 0; i < 3; i++)
{
struct CoordReturn cr;
cr = coord(x, y, z, V, value, i,
volumeptr, volResolution, volDims,
neighbourCoords,
voxelSize, voxelSizeInv,
volRot0, volRot1, volRot2, volTrans,
needNormals, false);
if(cr.result)
{
parr[npts] = cr.point;
narr[npts] = cr.normal;
npts++;
}
}
}
}
// 4 floats per point or normal
const int elemStep = 4;
__local float* normAddr;
__local int localCtr;
if(lid == 0)
localCtr = 0;
// push all pts (and nrm) from private array to local mem
int privateCtr = 0;
barrier(CLK_LOCAL_MEM_FENCE);
privateCtr = atomic_add(&localCtr, npts);
barrier(CLK_LOCAL_MEM_FENCE);
for(int i = 0; i < npts; i++)
{
__local float* addr = localbuf + (privateCtr+i)*elemStep;
vstore4((float4)(parr[i], 0), 0, addr);
}
if(needNormals)
{
normAddr = localbuf + localCtr*elemStep;
for(int i = 0; i < npts; i++)
{
__local float* addr = normAddr + (privateCtr+i)*elemStep;
vstore4((float4)(narr[i], 0), 0, addr);
}
}
// debugging purposes
if(lid == 0)
{
if(localCtr != nptsGroup)
{
printf("!!! fetchPointsNormals result may be incorrect, npts != localCtr at %3d %3d %3d: %3d vs %3d\n",
gx, gy, gz, localCtr, nptsGroup);
}
}
// copy local buffer to global mem
__local int whereToWrite;
if(lid == 0)
whereToWrite = atomic_add(atomicCtr, localCtr);
barrier(CLK_GLOBAL_MEM_FENCE);
event_t ev[2];
int evn = 0;
// points and normals are 1-column matrices
__global float* pts = (__global float*)(pointsptr +
points_offset +
whereToWrite*points_step);
ev[evn++] = async_work_group_copy(pts, localbuf, localCtr*elemStep, 0);
if(needNormals)
{
__global float* nrm = (__global float*)(normalsptr +
normals_offset +
whereToWrite*normals_step);
ev[evn++] = async_work_group_copy(nrm, normAddr, localCtr*elemStep, 0);
}
wait_group_events(evn, ev);
}

@ -13,6 +13,8 @@
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/ocl.hpp"
#include "opencl_kernels_rgbd.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/rgbd.hpp"

File diff suppressed because it is too large Load Diff

@ -18,12 +18,12 @@ class TSDFVolume
{
public:
// dimension in voxels, size in meters
TSDFVolume(int _res, float _size, cv::Affine3f _pose, float _truncDist, int _maxWeight,
float _raycastStepFactor);
TSDFVolume(Point3i _res, float _voxelSize, cv::Affine3f _pose, float _truncDist, int _maxWeight,
float _raycastStepFactor, bool zFirstMemOrder = true);
virtual void integrate(cv::Ptr<Frame> depth, float depthFactor, cv::Affine3f cameraPose, cv::kinfu::Intr intrinsics) = 0;
virtual void raycast(cv::Affine3f cameraPose, cv::kinfu::Intr intrinsics, cv::Size frameSize, int pyramidLevels,
cv::Ptr<FrameGenerator> frameGenerator, cv::Ptr<Frame> frame) const = 0;
virtual void integrate(InputArray _depth, float depthFactor, cv::Affine3f cameraPose, cv::kinfu::Intr intrinsics) = 0;
virtual void raycast(cv::Affine3f cameraPose, cv::kinfu::Intr intrinsics, cv::Size frameSize,
cv::OutputArray points, cv::OutputArray normals) const = 0;
virtual void fetchPointsNormals(cv::OutputArray points, cv::OutputArray normals) const = 0;
virtual void fetchNormals(cv::InputArray points, cv::OutputArray _normals) const = 0;
@ -32,14 +32,20 @@ public:
virtual ~TSDFVolume() { }
float edgeSize;
int edgeResolution;
float voxelSize;
float voxelSizeInv;
Point3i volResolution;
int maxWeight;
cv::Affine3f pose;
float raycastStepFactor;
Point3f volSize;
float truncDist;
Vec4i volDims;
Vec8i neighbourCoords;
};
cv::Ptr<TSDFVolume> makeTSDFVolume(cv::kinfu::Params::PlatformType t,
int _res, float _size, cv::Affine3f _pose, float _truncDist, int _maxWeight,
cv::Ptr<TSDFVolume> makeTSDFVolume(Point3i _res, float _voxelSize, cv::Affine3f _pose, float _truncDist, int _maxWeight,
float _raycastStepFactor);
} // namespace kinfu

@ -50,38 +50,6 @@ namespace rgbd
namespace kinfu {
#if PRINT_TIME
ScopeTime::ScopeTime(std::string name_, bool _enablePrint) :
name(name_), enablePrint(_enablePrint)
{
start = (double)cv::getTickCount();
nested++;
}
ScopeTime::~ScopeTime()
{
double time_ms = ((double)cv::getTickCount() - start)*1000.0/cv::getTickFrequency();
if(enablePrint)
{
std::string spaces(nested, '-');
std::cout << spaces << "Time(" << name << ") = " << time_ms << " ms" << std::endl;
}
nested--;
}
int ScopeTime::nested = 0;
#else
ScopeTime::ScopeTime(std::string /*name_*/, bool /*_enablePrint = true*/)
{ }
ScopeTime::~ScopeTime()
{ }
#endif
} // namespace kinfu
} // namespace cv

@ -47,14 +47,14 @@ rescaleDepthTemplated<double>(const Mat& in, Mat& out)
namespace kinfu {
// Print execution times of each block marked with ScopeTime
#define PRINT_TIME 0
// One place to turn intrinsics on and off
#define USE_INTRINSICS CV_SIMD128
typedef float depthType;
const float qnan = std::numeric_limits<float>::quiet_NaN();
const cv::Vec3f nan3(qnan, qnan, qnan);
#if CV_SIMD128
#if USE_INTRINSICS
const cv::v_float32x4 nanv(qnan, qnan, qnan, qnan);
#endif
@ -63,26 +63,13 @@ inline bool isNaN(cv::Point3f p)
return (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z));
}
#if CV_SIMD128
#if USE_INTRINSICS
static inline bool isNaN(const cv::v_float32x4& p)
{
return cv::v_check_any(p != p);
}
#endif
struct ScopeTime
{
ScopeTime(std::string name_, bool _enablePrint = true);
~ScopeTime();
#if PRINT_TIME
static int nested;
const std::string name;
const bool enablePrint;
double start;
#endif
};
/** @brief Camera intrinsics */
struct Intr
{
@ -143,6 +130,16 @@ struct Intr
float fx, fy, cx, cy;
};
inline size_t roundDownPow2(size_t x)
{
size_t shift = 0;
while(x != 0)
{
shift++; x >>= 1;
}
return (size_t)(1ULL << (shift-1));
}
} // namespace kinfu
} // namespace cv

@ -90,10 +90,18 @@ struct RenderInvoker : ParallelLoopBody
float depthFactor;
};
struct CubeSpheresScene
struct Scene
{
virtual ~Scene() {}
static Ptr<Scene> create(int nScene, Size sz, Matx33f _intr, float _depthFactor);
virtual Mat depth(Affine3f pose) = 0;
virtual std::vector<Affine3f> getPoses() = 0;
};
struct CubeSpheresScene : Scene
{
const int framesPerCycle = 32;
const int nCycles = 1;
const float nCycles = 0.25f;
const Affine3f startPose = Affine3f(Vec3f(-0.5f, 0.f, 0.f), Vec3f(2.1f, 1.4f, -2.1f));
CubeSpheresScene(Size sz, Matx33f _intr, float _depthFactor) :
@ -125,7 +133,7 @@ struct CubeSpheresScene
return res;
}
Mat depth(Affine3f pose)
Mat depth(Affine3f pose) override
{
Mat_<float> frame(frameSize);
Reprojector reproj(intr);
@ -136,10 +144,10 @@ struct CubeSpheresScene
return frame;
}
std::vector<Affine3f> getPoses()
std::vector<Affine3f> getPoses() override
{
std::vector<Affine3f> poses;
for(int i = 0; i < framesPerCycle*nCycles; i++)
for(int i = 0; i < (int)(framesPerCycle*nCycles); i++)
{
float angle = (float)(CV_2PI*i/framesPerCycle);
Affine3f pose;
@ -160,10 +168,10 @@ struct CubeSpheresScene
};
struct RotatingScene
struct RotatingScene : Scene
{
const int framesPerCycle = 64;
const int nCycles = 1;
const int framesPerCycle = 32;
const float nCycles = 0.5f;
const Affine3f startPose = Affine3f(Vec3f(-1.f, 0.f, 0.f), Vec3f(1.5f, 2.f, -1.5f));
RotatingScene(Size sz, Matx33f _intr, float _depthFactor) :
@ -221,7 +229,7 @@ struct RotatingScene
return res;
}
Mat depth(Affine3f pose)
Mat depth(Affine3f pose) override
{
Mat_<float> frame(frameSize);
Reprojector reproj(intr);
@ -232,7 +240,7 @@ struct RotatingScene
return frame;
}
std::vector<Affine3f> getPoses()
std::vector<Affine3f> getPoses() override
{
std::vector<Affine3f> poses;
for(int i = 0; i < framesPerCycle*nCycles; i++)
@ -258,24 +266,42 @@ struct RotatingScene
Mat_<float> RotatingScene::randTexture(256, 256);
Ptr<Scene> Scene::create(int nScene, Size sz, Matx33f _intr, float _depthFactor)
{
if(nScene == 0)
return makePtr<RotatingScene>(sz, _intr, _depthFactor);
else
return makePtr<CubeSpheresScene>(sz, _intr, _depthFactor);
}
static const bool display = false;
TEST( KinectFusion, lowDense )
void flyTest(bool hiDense, bool inequal)
{
Ptr<kinfu::Params> params = kinfu::Params::coarseParams();
Ptr<kinfu::Params> params;
if(hiDense)
params = kinfu::Params::defaultParams();
else
params = kinfu::Params::coarseParams();
if(inequal)
{
params->volumeDims[0] += 32;
params->volumeDims[1] -= 32;
}
RotatingScene scene(params->frameSize, params->intr, params->depthFactor);
Ptr<Scene> scene = Scene::create(hiDense, params->frameSize, params->intr, params->depthFactor);
Ptr<kinfu::KinFu> kf = kinfu::KinFu::create(params);
std::vector<Affine3f> poses = scene.getPoses();
std::vector<Affine3f> poses = scene->getPoses();
Affine3f startPoseGT = poses[0], startPoseKF;
Affine3f pose, kfPose;
for(size_t i = 0; i < poses.size(); i++)
{
pose = poses[i];
Mat depth = scene.depth(pose);
Mat depth = scene->depth(pose);
ASSERT_TRUE(kf->update(depth));
@ -295,46 +321,35 @@ TEST( KinectFusion, lowDense )
}
}
ASSERT_LT(cv::norm(kfPose.rvec() - pose.rvec()), 0.01);
ASSERT_LT(cv::norm(kfPose.translation() - pose.translation()), 0.1);
double rvecThreshold = hiDense ? 0.01 : 0.02;
ASSERT_LT(cv::norm(kfPose.rvec() - pose.rvec()), rvecThreshold);
double poseThreshold = hiDense ? 0.03 : 0.1;
ASSERT_LT(cv::norm(kfPose.translation() - pose.translation()), poseThreshold);
}
TEST( KinectFusion, highDense )
TEST( KinectFusion, lowDense )
{
Ptr<kinfu::Params> params = kinfu::Params::defaultParams();
CubeSpheresScene scene(params->frameSize, params->intr, params->depthFactor);
Ptr<kinfu::KinFu> kf = kinfu::KinFu::create(params);
std::vector<Affine3f> poses = scene.getPoses();
Affine3f startPoseGT = poses[0], startPoseKF;
Affine3f pose, kfPose;
for(size_t i = 0; i < poses.size(); i++)
{
pose = poses[i];
Mat depth = scene.depth(pose);
ASSERT_TRUE(kf->update(depth));
kfPose = kf->getPose();
if(i == 0)
startPoseKF = kfPose;
flyTest(false, false);
}
pose = ( startPoseGT.inv() * pose )*startPoseKF;
TEST( KinectFusion, highDense )
{
flyTest(true, false);
}
if(display)
{
imshow("depth", depth*(1.f/params->depthFactor/4.f));
Mat rendered;
kf->render(rendered);
imshow("render", rendered);
waitKey(10);
}
}
TEST( KinectFusion, inequal )
{
flyTest(false, true);
}
ASSERT_LT(cv::norm(kfPose.rvec() - pose.rvec()), 0.01);
ASSERT_LT(cv::norm(kfPose.translation() - pose.translation()), 0.03);
#ifdef HAVE_OPENCL
TEST( KinectFusion, OCL )
{
cv::ocl::setUseOpenCL(false);
flyTest(false, false);
cv::ocl::setUseOpenCL(true);
flyTest(false, false);
}
#endif
}} // namespace

@ -235,10 +235,10 @@ void CV_OdometryTest::run(int)
// On each iteration an input frame is warped using generated transformation.
// Odometry is run on the following pair: the original frame and the warped one.
// Comparing a computed transformation with an applied one we compute 2 errors:
// better_1time_count - count of poses which error is less than ground thrush pose,
// better_5times_count - count of poses which error is 5 times less than ground thrush pose.
// better_1time_count - count of poses which error is less than ground truth pose,
// better_5times_count - count of poses which error is 5 times less than ground truth pose.
int iterCount = 100;
int better_1time_count = 0;
int better_1time_count = 0;
int better_5times_count = 0;
for(int iter = 0; iter < iterCount; iter++)
{
@ -267,7 +267,6 @@ void CV_OdometryTest::run(int)
waitKey();
#endif
// compare rotation
double rdiffnorm = cv::norm(rvec - calcRvec),
rnorm = cv::norm(rvec);

@ -13,6 +13,10 @@
#include <opencv2/rgbd.hpp>
#include <opencv2/calib3d.hpp>
#ifdef HAVE_OPENCL
#include <opencv2/core/ocl.hpp>
#endif
namespace opencv_test {
using namespace cv::rgbd;
}

Loading…
Cancel
Save