Merge pull request #21095 from No-Plane-Cannot-Be-Detected:next_SIMD

Accelerated 3D point cloud Farthest Point Sampling calculation using SIMD.

* Add several 3D point cloud sampling functions: Random, VoxelGrid, FarthestPoint.

* Made some code detail changes and exposed the random number generator parameters at the interface.

* Add simple tests for sampling.

* Modify interface output parameters.

* Modify interface return value.

* The sampling test is modified for the new changes of function interface.

* Improved test of VoxelGridFilterSampling

* Improved test of VoxelGridFilterSampling and FPS.

* Add test for the dist_lower_limit arguments of FPS function.

* Optimization function _getMatFromInputArray.

* Optimize the code style and some details according to the suggestions.

* Clear prefix cv: in the source code.

* Change the initialization of Mat in the sampling test.

* 1. Unified code style
2. Optimize randomSampling method

* 1. Optimize code comments.
2. Remove unused local variables.

* Rebuild the structure of the test, make the test case more reliable, and change the code style.

* Update test_sampling.cpp

Fix a warning.

* Use SIMD to optimize the farthest point sampling.

* Optimize the farthest point sampling SIMD code.

* 1. remove `\n` from the ptcloud.hpp comment.
2. updated the default value of the argument arrangement_of_points in the _getMatFromInputArray function in ptcloud_utils.hpp from 0 to 1, since the latter is more commonly used (such arrangement is easier for SIMD acceleration).
3. removed two functions in ptcloud_utils.hpp that were not used.

* Remove the <br> in the comment.

* Fix whitespace issues.
pull/21201/head
Ruan 4 years ago committed by GitHub
parent 1470f90c2a
commit 0cf0a5e9d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 14
      modules/3d/include/opencv2/3d/ptcloud.hpp
  2. 78
      modules/3d/src/ptcloud/ptcloud_utils.hpp
  3. 198
      modules/3d/src/ptcloud/sampling.cpp

@ -18,7 +18,7 @@ namespace cv {
* point cloud data, in each voxel (i.e., 3D box), all the points present will be
* approximated (i.e., downsampled) with the point closest to their centroid.
*
* @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector<int> or std::vector<char> etc.)
* @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector<int> or std::vector<char> etc.)
* sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected.
* @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN.
* @param length Grid length.
@ -63,17 +63,17 @@ CV_EXPORTS void randomSampling(OutputArray sampled_pts, InputArray input_pts,
* @brief Point cloud sampling by Farthest Point Sampling(FPS).
*
* FPS Algorithm:
* Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit*
* Initialize: Set sampled point cloud S to the empty set
* Step:
* + Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit*
* + Initialize: Set sampled point cloud S to the empty set
* + Step:
* 1. Randomly take a seed point from C and take it from C to S;
* 2. Find a point in C that is the farthest away from S and take it from C to S;
* (The distance from point to set S is the smallest distance from point to all points in S)
* 3. Repeat *step 2* until the farthest distance of the point in C from S
* is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*.
* Output: Sampled point cloud S
* + Output: Sampled point cloud S
*
* @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector<int> or std::vector<char> etc.)
* @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector<int> or std::vector<char> etc.)
* sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected.
* @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN.
* @param sampled_pts_size The desired point cloud size after sampling.
@ -89,7 +89,7 @@ CV_EXPORTS int farthestPointSampling(OutputArray sampled_point_flags, InputArray
/**
* @overload
*
* @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector<int> or std::vector<char> etc.)
* @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector<int> or std::vector<char> etc.)
* sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected.
* @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN.
* @param sampled_scale Range (0, 1), the percentage of the sampled point cloud to the original size,

@ -0,0 +1,78 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_3D_PTCLOUD_UTILS_HPP
#define OPENCV_3D_PTCLOUD_UTILS_HPP
namespace cv {
/**
* @brief Get cv::Mat with Nx3 or 3xN type CV_32FC1 from cv::InputArray.
*
* @param input_pts Point cloud xyz data.
* @param[out] mat Point cloud xyz data in cv::Mat with Nx3 or 3xN type CV_32FC1.
* @param arrangement_of_points The arrangement of point data in the matrix,
* 0 by row (Nx3, [x1, y1, z1, ..., xn, yn, zn]),
* 1 by column (3xN, [x1, ..., xn, y1, ..., yn, z1, ..., zn]).
* @param clone_data Flag to specify whether data cloning is mandatory.
*
* @note The following cases will clone data even if flag clone_data is false:
* 1. Data is discontinuous in memory
* 2. Data type is not float
* 3. The original arrangement of data is not the same as the expected new arrangement.
* For example, transforming from
* Nx3(x1, y1, z1, ..., xn, yn, zn) to 3xN(x1, ..., xn, y1, ..., yn, z1, ..., zn)
*
*/
inline void _getMatFromInputArray(InputArray input_pts, Mat &mat,
int arrangement_of_points = 1, bool clone_data = false)
{
CV_Check(input_pts.dims(), input_pts.dims() < 3,
"Only support data with dimension less than 3.");
// Guaranteed data can construct N×3 point clouds
int rows = input_pts.rows(), cols = input_pts.cols(), channels = input_pts.channels();
size_t total = rows * cols * channels;
CV_Check(total, total % 3 == 0,
"total = input_pts.rows() * input_pts.cols() * input_pts.channels() must be an integer multiple of 3");
/**
Layout of point cloud data in memory space.
arrangement 0 : x1, y1, z1, ..., xn, yn, zn
For example, the input is std::vector<Point3d>, or std::vector<int>,
or cv::Mat with type N×1 CV_32FC3
arrangement 1 : x1, ..., xn, y1, ..., yn, z1, ..., zn
For example, the input is cv::Mat with type 3×N CV_32FC1
*/
int ori_arrangement = (channels == 1 && rows == 3 && cols != 3) ? 1 : 0;
// Convert to single channel without copying the data.
mat = ori_arrangement == 0 ? input_pts.getMat().reshape(1, (int) (total / 3))
: input_pts.getMat();
if (ori_arrangement != arrangement_of_points)
{
Mat tmp;
transpose(mat, tmp);
swap(mat, tmp);
}
if (mat.type() != CV_32F)
{
Mat tmp;
mat.convertTo(tmp, CV_32F); // Use float to store data
swap(mat, tmp);
}
if (clone_data || (!mat.isContinuous()))
{
mat = mat.clone();
}
}
}
#endif //OPENCV_3D_PTCLOUD_UTILS_HPP

@ -5,6 +5,7 @@
#include "../precomp.hpp"
#include "opencv2/3d/ptcloud.hpp"
#include "ptcloud_utils.hpp"
#include <unordered_map>
namespace cv {
@ -20,52 +21,8 @@ static inline void _swap(Tp &n, Tp &m)
m = tmp;
}
/**
* Get cv::Mat with type N×3 CV_32FC1 from cv::InputArray
* Use different interpretations for the same memory data.
*/
static inline void _getMatFromInputArray(InputArray input_pts, Mat &mat)
{
CV_Check(input_pts.dims(), input_pts.dims() < 3,
"Only support data with dimension less than 3.");
// Guaranteed data can construct N×3 point clouds
int rows = input_pts.rows(), cols = input_pts.cols(), channels = input_pts.channels();
size_t total = rows * cols * channels;
CV_Check(total, total % 3 == 0,
"total = input_pts.rows() * input_pts.cols() * input_pts.channels() must be an integer multiple of 3");
if (channels == 1 && rows == 3 && cols != 3)
{
// Layout of point cloud data in memory space:
// x1, ..., xn, y1, ..., yn, z1, ..., zn
// For example, the input is cv::Mat with type 3×N CV_32FC1
transpose(input_pts, mat);
}
else
{
// Layout of point cloud data in memory space:
// x1, y1, z1, ..., xn, yn, zn
// For example, the input is std::vector<Point3d>, or std::vector<int>, or cv::Mat with type N×1 CV_32FC3
mat = input_pts.getMat().reshape(1, (int) (total / 3));
}
if (mat.type() != CV_32F)
{
Mat tmp;
mat.convertTo(tmp, CV_32F); // Use float to store data
swap(mat, tmp);
}
if (!mat.isContinuous())
{
mat = mat.clone();
}
}
int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts,
const float length, const float width, const float height)
const float length, const float width, const float height)
{
CV_CheckGT(length, 0.0f, "Invalid length of grid");
CV_CheckGT(width, 0.0f, "Invalid width of grid");
@ -73,7 +30,7 @@ int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts,
// Get input point cloud data
Mat ori_pts;
_getMatFromInputArray(input_pts, ori_pts);
_getMatFromInputArray(input_pts, ori_pts, 0);
const int ori_pts_size = ori_pts.rows;
@ -184,7 +141,8 @@ int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts,
return pts_new_size;
} // voxelGrid()
void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sampled_pts_size, RNG *rng)
void
randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sampled_pts_size, RNG *rng)
{
CV_CheckGT(sampled_pts_size, 0, "The point cloud size after sampling must be greater than 0.");
CV_CheckDepth(sampled_pts.depth(), sampled_pts.isMat() || sampled_pts.depth() == CV_32F,
@ -192,7 +150,7 @@ void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sam
// Get input point cloud data
Mat ori_pts;
_getMatFromInputArray(input_pts, ori_pts);
_getMatFromInputArray(input_pts, ori_pts, 0);
const int ori_pts_size = ori_pts.rows;
CV_CheckLT(sampled_pts_size, ori_pts_size,
@ -229,45 +187,51 @@ void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sam
} // randomSampling()
void randomSampling(OutputArray sampled_pts, InputArray input_pts, const float sampled_scale, RNG *rng)
void
randomSampling(OutputArray sampled_pts, InputArray input_pts, const float sampled_scale, RNG *rng)
{
CV_CheckGT(sampled_scale, 0.0f, "The point cloud sampled scale must greater than 0.");
CV_CheckLT(sampled_scale, 1.0f, "The point cloud sampled scale must less than 1.");
Mat ori_pts;
_getMatFromInputArray(input_pts, ori_pts);
_getMatFromInputArray(input_pts, ori_pts, 0);
randomSampling(sampled_pts, input_pts, cvCeil(sampled_scale * ori_pts.rows), rng);
} // randomSampling()
/**
* FPS Algorithm:
* Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit*
* Initialize: Set sampled point cloud S to the empty set
* Step:
* 1. Randomly take a seed point from C and take it from C to S;
* 2. Find a point in C that is the farthest away from S and take it from C to S;
* (The distance from point to set S is the smallest distance from point to all points in S)
* 3. Repeat *step 2* until the farthest distance of the point in C from S
* is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*.
* Output: Sampled point cloud S
* FPS Algorithm:\n
* Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit* \n
* Initialize: Set sampled point cloud S to the empty set \n
* Step: \n
* 1. Randomly take a seed point from C and take it from C to S; \n
* 2. Find a point in C that is the farthest away from S and take it from C to S; \n
* (The distance from point to set S is the smallest distance from point to all points in S) \n
* 3. Repeat *step 2* until the farthest distance of the point in C from S \n
* is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*. \n
* Output: Sampled point cloud S \n
*/
int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
const int sampled_pts_size, const float dist_lower_limit, RNG *rng)
const int sampled_pts_size, const float dist_lower_limit, RNG *rng)
{
CV_CheckGT(sampled_pts_size, 0, "The point cloud size after sampling must be greater than 0.");
CV_CheckGE(dist_lower_limit, 0.0f, "The distance lower bound must be greater than or equal to 0.");
CV_CheckGE(dist_lower_limit, 0.0f,
"The distance lower bound must be greater than or equal to 0.");
// Get input point cloud data
Mat ori_pts;
_getMatFromInputArray(input_pts, ori_pts);
// In order to keep the points continuous in memory (which allows better support for SIMD),
// the position of the points may be changed, data copying is mandatory
_getMatFromInputArray(input_pts, ori_pts, 1, true);
const int ori_pts_size = ori_pts.rows;
const int ori_pts_size = ori_pts.rows * ori_pts.cols / 3;
CV_CheckLT(sampled_pts_size, ori_pts_size,
"The sampled point cloud size must be smaller than the original point cloud size.");
// idx arr [ . . . . . . . . . ] --- sampling ---> [ . . . . . . . . . ]
// C S | C
// _idxs records the original location/id of the point
AutoBuffer<int> _idxs(ori_pts_size);
// _dist_square records the distance from point(in C) to S
AutoBuffer<float> _dist_square(ori_pts_size);
int *idxs = _idxs.data();
float *dist_square = _dist_square.data();
@ -277,39 +241,85 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
dist_square[i] = FLT_MAX;
}
// Randomly take a seed point from C and put it into S
int seed = (int)((rng? rng->next(): theRNG().next()) % ori_pts_size);
// Randomly take a seed point from C and take it from C to S
int seed = (int) ((rng ? rng->next() : theRNG().next()) % ori_pts_size);
idxs[0] = seed;
idxs[seed] = 0;
std::vector<char> _sampled_point_flags(ori_pts_size, 0);
_sampled_point_flags[seed] = 1;
// Pointer (base address) of access point data x,y,z
float *const ori_pts_ptr_x = (float *) ori_pts.data;
float *const ori_pts_ptr_y = ori_pts_ptr_x + ori_pts_size;
float *const ori_pts_ptr_z = ori_pts_ptr_y + ori_pts_size;
// Ensure that the point(in C) data x,y,z is continuous in the memory respectively
_swap(ori_pts_ptr_x[seed], ori_pts_ptr_x[0]);
_swap(ori_pts_ptr_y[seed], ori_pts_ptr_y[0]);
_swap(ori_pts_ptr_z[seed], ori_pts_ptr_z[0]);
float *const ori_pts_ptr = (float *) ori_pts.data;
int sampled_cnt = 1;
const float dist_lower_limit_square = dist_lower_limit * dist_lower_limit;
while (sampled_cnt < sampled_pts_size)
{
int last_pt = sampled_cnt - 1;
float *const last_pt_ptr_base = ori_pts_ptr + 3 * idxs[last_pt];
float last_pt_x = last_pt_ptr_base[0], last_pt_y = last_pt_ptr_base[1], last_pt_z = last_pt_ptr_base[2];
float last_pt_x = ori_pts_ptr_x[last_pt];
float last_pt_y = ori_pts_ptr_y[last_pt];
float last_pt_z = ori_pts_ptr_z[last_pt];
// Calculate the distance from point in C to set S
float max_dist_square = 0;
for (int i = sampled_cnt; i < ori_pts_size; ++i)
int next_pt = sampled_cnt;
int i = sampled_cnt;
#ifdef CV_SIMD
int k = (ori_pts_size - sampled_cnt) / v_float32::nlanes;
int end = sampled_cnt + v_float32::nlanes * k;
v_float32 v_last_p_x = vx_setall_f32(last_pt_x);
v_float32 v_last_p_y = vx_setall_f32(last_pt_y);
v_float32 v_last_p_z = vx_setall_f32(last_pt_z);
for (; i < end; i += v_float32::nlanes)
{
float *const ori_pts_ptr_base = ori_pts_ptr + 3 * idxs[i];
float x_diff = (last_pt_x - ori_pts_ptr_base[0]);
float y_diff = (last_pt_y - ori_pts_ptr_base[1]);
float z_diff = (last_pt_z - ori_pts_ptr_base[2]);
float next_dist_square = x_diff * x_diff + y_diff * y_diff + z_diff * z_diff;
if (next_dist_square < dist_square[i])
v_float32 vx_diff = v_last_p_x - vx_load(ori_pts_ptr_x + i);
v_float32 vy_diff = v_last_p_y - vx_load(ori_pts_ptr_y + i);
v_float32 vz_diff = v_last_p_z - vx_load(ori_pts_ptr_z + i);
v_float32 v_next_dist_square =
vx_diff * vx_diff + vy_diff * vy_diff + vz_diff * vz_diff;
// Update the distance from the points(in C) to S
float *dist_square_ptr = dist_square + i;
v_float32 v_dist_square = vx_load(dist_square_ptr);
v_dist_square = v_min(v_dist_square, v_next_dist_square);
vx_store(dist_square_ptr, v_dist_square);
// Find a point in C that is the farthest away from S and take it from C to S
if (v_check_any(v_dist_square > vx_setall_f32(max_dist_square)))
{
dist_square[i] = next_dist_square;
for (int m = 0; m < v_float32::nlanes; ++m)
{
if (dist_square_ptr[m] > max_dist_square)
{
next_pt = i + m;
max_dist_square = dist_square_ptr[m];
}
}
}
}
#endif
for (; i < ori_pts_size; ++i)
{
float x_diff = (last_pt_x - ori_pts_ptr_x[i]);
float y_diff = (last_pt_y - ori_pts_ptr_y[i]);
float z_diff = (last_pt_z - ori_pts_ptr_z[i]);
float next_dist_square = x_diff * x_diff + y_diff * y_diff + z_diff * z_diff;
// Update the distance from the points(in C) to S
dist_square[i] = std::min(dist_square[i], next_dist_square);
// Find a point in C that is the farthest away from S and take it from C to S
if (dist_square[i] > max_dist_square)
{
last_pt = i;
next_pt = i;
max_dist_square = dist_square[i];
}
}
@ -318,26 +328,40 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
if (max_dist_square < dist_lower_limit_square)
break;
_sampled_point_flags[idxs[last_pt]] = 1;
_swap(idxs[sampled_cnt], idxs[last_pt]);
_swap(dist_square[sampled_cnt], dist_square[last_pt]);
// Take point next_pt from C to S
_swap(idxs[next_pt], idxs[sampled_cnt]);
_swap(dist_square[next_pt], dist_square[sampled_cnt]);
// Ensure that the point(in C) data x,y,z is continuous in the memory respectively
_swap(ori_pts_ptr_x[next_pt], ori_pts_ptr_x[sampled_cnt]);
_swap(ori_pts_ptr_y[next_pt], ori_pts_ptr_y[sampled_cnt]);
_swap(ori_pts_ptr_z[next_pt], ori_pts_ptr_z[sampled_cnt]);
++sampled_cnt;
}
std::vector<char> _sampled_point_flags(ori_pts_size, 0);
for (int j = 0; j < sampled_cnt; ++j)
{
_sampled_point_flags[idxs[j]] = 1;
}
Mat(_sampled_point_flags).copyTo(sampled_point_flags);
return sampled_cnt;
} // farthestPointSampling()
int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
const float sampled_scale, const float dist_lower_limit, RNG *rng)
const float sampled_scale, const float dist_lower_limit, RNG *rng)
{
CV_CheckGT(sampled_scale, 0.0f, "The point cloud sampled scale must greater than 0.");
CV_CheckLT(sampled_scale, 1.0f, "The point cloud sampled scale must less than 1.");
CV_CheckGE(dist_lower_limit, 0.0f, "The distance lower bound must be greater than or equal to 0.");
CV_CheckGE(dist_lower_limit, 0.0f,
"The distance lower bound must be greater than or equal to 0.");
Mat ori_pts;
_getMatFromInputArray(input_pts, ori_pts);
_getMatFromInputArray(input_pts, ori_pts, 1);
return farthestPointSampling(sampled_point_flags, input_pts,
cvCeil(sampled_scale * ori_pts.rows), dist_lower_limit, rng);
cvCeil(sampled_scale * ori_pts.cols), dist_lower_limit, rng);
} // farthestPointSampling()
//! @} _3d

Loading…
Cancel
Save