From 0cf0a5e9d4627c83ee4dfd162e6c1361103b3c29 Mon Sep 17 00:00:00 2001
From: Ruan <47767371+Ryyyc@users.noreply.github.com>
Date: Tue, 30 Nov 2021 20:33:44 +0800
Subject: [PATCH] Merge pull request #21095 from
No-Plane-Cannot-Be-Detected:next_SIMD
Accelerated 3D point cloud Farthest Point Sampling calculation using SIMD.
* Add several 3D point cloud sampling functions: Random, VoxelGrid, FarthestPoint.
* Made some code detail changes and exposed the random number generator parameters at the interface.
* Add simple tests for sampling.
* Modify interface output parameters.
* Modify interface return value.
* The sampling test is modified for the new changes of function interface.
* Improved test of VoxelGridFilterSampling
* Improved test of VoxelGridFilterSampling and FPS.
* Add test for the dist_lower_limit arguments of FPS function.
* Optimization function _getMatFromInputArray.
* Optimize the code style and some details according to the suggestions.
* Clear prefix cv: in the source code.
* Change the initialization of Mat in the sampling test.
* 1. Unified code style
2. Optimize randomSampling method
* 1. Optimize code comments.
2. Remove unused local variables.
* Rebuild the structure of the test, make the test case more reliable, and change the code style.
* Update test_sampling.cpp
Fix a warning.
* Use SIMD to optimize the farthest point sampling.
* Optimize the farthest point sampling SIMD code.
* 1. remove `\n` from the ptcloud.hpp comment.
2. updated the default value of the argument arrangement_of_points in the _getMatFromInputArray function in ptcloud_utils.hpp from 0 to 1, since the latter is more commonly used (such arrangement is easier for SIMD acceleration).
3. removed two functions in ptcloud_utils.hpp that were not used.
* Remove the
in the comment.
* Fix whitespace issues.
---
modules/3d/include/opencv2/3d/ptcloud.hpp | 14 +-
modules/3d/src/ptcloud/ptcloud_utils.hpp | 78 +++++++++
modules/3d/src/ptcloud/sampling.cpp | 198 ++++++++++++----------
3 files changed, 196 insertions(+), 94 deletions(-)
create mode 100644 modules/3d/src/ptcloud/ptcloud_utils.hpp
diff --git a/modules/3d/include/opencv2/3d/ptcloud.hpp b/modules/3d/include/opencv2/3d/ptcloud.hpp
index d1dec17ca1..110c5049ae 100644
--- a/modules/3d/include/opencv2/3d/ptcloud.hpp
+++ b/modules/3d/include/opencv2/3d/ptcloud.hpp
@@ -18,7 +18,7 @@ namespace cv {
* point cloud data, in each voxel (i.e., 3D box), all the points present will be
* approximated (i.e., downsampled) with the point closest to their centroid.
*
- * @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector or std::vector etc.)
+ * @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector or std::vector etc.)
* sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected.
* @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN.
* @param length Grid length.
@@ -63,17 +63,17 @@ CV_EXPORTS void randomSampling(OutputArray sampled_pts, InputArray input_pts,
* @brief Point cloud sampling by Farthest Point Sampling(FPS).
*
* FPS Algorithm:
- * Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit*
- * Initialize: Set sampled point cloud S to the empty set
- * Step:
+ * + Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit*
+ * + Initialize: Set sampled point cloud S to the empty set
+ * + Step:
* 1. Randomly take a seed point from C and take it from C to S;
* 2. Find a point in C that is the farthest away from S and take it from C to S;
* (The distance from point to set S is the smallest distance from point to all points in S)
* 3. Repeat *step 2* until the farthest distance of the point in C from S
* is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*.
- * Output: Sampled point cloud S
+ * + Output: Sampled point cloud S
*
- * @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector or std::vector etc.)
+ * @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector or std::vector etc.)
* sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected.
* @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN.
* @param sampled_pts_size The desired point cloud size after sampling.
@@ -89,7 +89,7 @@ CV_EXPORTS int farthestPointSampling(OutputArray sampled_point_flags, InputArray
/**
* @overload
*
- * @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector or std::vector etc.)
+ * @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector or std::vector etc.)
* sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected.
* @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN.
* @param sampled_scale Range (0, 1), the percentage of the sampled point cloud to the original size,
diff --git a/modules/3d/src/ptcloud/ptcloud_utils.hpp b/modules/3d/src/ptcloud/ptcloud_utils.hpp
new file mode 100644
index 0000000000..28cf05b4f2
--- /dev/null
+++ b/modules/3d/src/ptcloud/ptcloud_utils.hpp
@@ -0,0 +1,78 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+
+#ifndef OPENCV_3D_PTCLOUD_UTILS_HPP
+#define OPENCV_3D_PTCLOUD_UTILS_HPP
+
+namespace cv {
+
+/**
+ * @brief Get cv::Mat with Nx3 or 3xN type CV_32FC1 from cv::InputArray.
+ *
+ * @param input_pts Point cloud xyz data.
+ * @param[out] mat Point cloud xyz data in cv::Mat with Nx3 or 3xN type CV_32FC1.
+ * @param arrangement_of_points The arrangement of point data in the matrix,
+ * 0 by row (Nx3, [x1, y1, z1, ..., xn, yn, zn]),
+ * 1 by column (3xN, [x1, ..., xn, y1, ..., yn, z1, ..., zn]).
+ * @param clone_data Flag to specify whether data cloning is mandatory.
+ *
+ * @note The following cases will clone data even if flag clone_data is false:
+ * 1. Data is discontinuous in memory
+ * 2. Data type is not float
+ * 3. The original arrangement of data is not the same as the expected new arrangement.
+ * For example, transforming from
+ * Nx3(x1, y1, z1, ..., xn, yn, zn) to 3xN(x1, ..., xn, y1, ..., yn, z1, ..., zn)
+ *
+ */
+inline void _getMatFromInputArray(InputArray input_pts, Mat &mat,
+ int arrangement_of_points = 1, bool clone_data = false)
+{
+ CV_Check(input_pts.dims(), input_pts.dims() < 3,
+ "Only support data with dimension less than 3.");
+
+ // Guaranteed data can construct N×3 point clouds
+ int rows = input_pts.rows(), cols = input_pts.cols(), channels = input_pts.channels();
+ size_t total = rows * cols * channels;
+ CV_Check(total, total % 3 == 0,
+ "total = input_pts.rows() * input_pts.cols() * input_pts.channels() must be an integer multiple of 3");
+
+ /**
+ Layout of point cloud data in memory space.
+ arrangement 0 : x1, y1, z1, ..., xn, yn, zn
+ For example, the input is std::vector, or std::vector,
+ or cv::Mat with type N×1 CV_32FC3
+ arrangement 1 : x1, ..., xn, y1, ..., yn, z1, ..., zn
+ For example, the input is cv::Mat with type 3×N CV_32FC1
+ */
+ int ori_arrangement = (channels == 1 && rows == 3 && cols != 3) ? 1 : 0;
+
+ // Convert to single channel without copying the data.
+ mat = ori_arrangement == 0 ? input_pts.getMat().reshape(1, (int) (total / 3))
+ : input_pts.getMat();
+
+ if (ori_arrangement != arrangement_of_points)
+ {
+ Mat tmp;
+ transpose(mat, tmp);
+ swap(mat, tmp);
+ }
+
+ if (mat.type() != CV_32F)
+ {
+ Mat tmp;
+ mat.convertTo(tmp, CV_32F); // Use float to store data
+ swap(mat, tmp);
+ }
+
+ if (clone_data || (!mat.isContinuous()))
+ {
+ mat = mat.clone();
+ }
+
+}
+
+}
+
+#endif //OPENCV_3D_PTCLOUD_UTILS_HPP
diff --git a/modules/3d/src/ptcloud/sampling.cpp b/modules/3d/src/ptcloud/sampling.cpp
index 5d0b13e58d..c4656221a7 100644
--- a/modules/3d/src/ptcloud/sampling.cpp
+++ b/modules/3d/src/ptcloud/sampling.cpp
@@ -5,6 +5,7 @@
#include "../precomp.hpp"
#include "opencv2/3d/ptcloud.hpp"
+#include "ptcloud_utils.hpp"
#include
namespace cv {
@@ -20,52 +21,8 @@ static inline void _swap(Tp &n, Tp &m)
m = tmp;
}
-/**
- * Get cv::Mat with type N×3 CV_32FC1 from cv::InputArray
- * Use different interpretations for the same memory data.
- */
-static inline void _getMatFromInputArray(InputArray input_pts, Mat &mat)
-{
- CV_Check(input_pts.dims(), input_pts.dims() < 3,
- "Only support data with dimension less than 3.");
-
- // Guaranteed data can construct N×3 point clouds
- int rows = input_pts.rows(), cols = input_pts.cols(), channels = input_pts.channels();
- size_t total = rows * cols * channels;
- CV_Check(total, total % 3 == 0,
- "total = input_pts.rows() * input_pts.cols() * input_pts.channels() must be an integer multiple of 3");
-
- if (channels == 1 && rows == 3 && cols != 3)
- {
- // Layout of point cloud data in memory space:
- // x1, ..., xn, y1, ..., yn, z1, ..., zn
- // For example, the input is cv::Mat with type 3×N CV_32FC1
- transpose(input_pts, mat);
- }
- else
- {
- // Layout of point cloud data in memory space:
- // x1, y1, z1, ..., xn, yn, zn
- // For example, the input is std::vector, or std::vector, or cv::Mat with type N×1 CV_32FC3
- mat = input_pts.getMat().reshape(1, (int) (total / 3));
- }
-
- if (mat.type() != CV_32F)
- {
- Mat tmp;
- mat.convertTo(tmp, CV_32F); // Use float to store data
- swap(mat, tmp);
- }
-
- if (!mat.isContinuous())
- {
- mat = mat.clone();
- }
-
-}
-
int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts,
- const float length, const float width, const float height)
+ const float length, const float width, const float height)
{
CV_CheckGT(length, 0.0f, "Invalid length of grid");
CV_CheckGT(width, 0.0f, "Invalid width of grid");
@@ -73,7 +30,7 @@ int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts,
// Get input point cloud data
Mat ori_pts;
- _getMatFromInputArray(input_pts, ori_pts);
+ _getMatFromInputArray(input_pts, ori_pts, 0);
const int ori_pts_size = ori_pts.rows;
@@ -184,7 +141,8 @@ int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts,
return pts_new_size;
} // voxelGrid()
-void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sampled_pts_size, RNG *rng)
+void
+randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sampled_pts_size, RNG *rng)
{
CV_CheckGT(sampled_pts_size, 0, "The point cloud size after sampling must be greater than 0.");
CV_CheckDepth(sampled_pts.depth(), sampled_pts.isMat() || sampled_pts.depth() == CV_32F,
@@ -192,7 +150,7 @@ void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sam
// Get input point cloud data
Mat ori_pts;
- _getMatFromInputArray(input_pts, ori_pts);
+ _getMatFromInputArray(input_pts, ori_pts, 0);
const int ori_pts_size = ori_pts.rows;
CV_CheckLT(sampled_pts_size, ori_pts_size,
@@ -229,45 +187,51 @@ void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sam
} // randomSampling()
-void randomSampling(OutputArray sampled_pts, InputArray input_pts, const float sampled_scale, RNG *rng)
+void
+randomSampling(OutputArray sampled_pts, InputArray input_pts, const float sampled_scale, RNG *rng)
{
CV_CheckGT(sampled_scale, 0.0f, "The point cloud sampled scale must greater than 0.");
CV_CheckLT(sampled_scale, 1.0f, "The point cloud sampled scale must less than 1.");
Mat ori_pts;
- _getMatFromInputArray(input_pts, ori_pts);
+ _getMatFromInputArray(input_pts, ori_pts, 0);
randomSampling(sampled_pts, input_pts, cvCeil(sampled_scale * ori_pts.rows), rng);
} // randomSampling()
/**
- * FPS Algorithm:
- * Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit*
- * Initialize: Set sampled point cloud S to the empty set
- * Step:
- * 1. Randomly take a seed point from C and take it from C to S;
- * 2. Find a point in C that is the farthest away from S and take it from C to S;
- * (The distance from point to set S is the smallest distance from point to all points in S)
- * 3. Repeat *step 2* until the farthest distance of the point in C from S
- * is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*.
- * Output: Sampled point cloud S
+ * FPS Algorithm:\n
+ * Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit* \n
+ * Initialize: Set sampled point cloud S to the empty set \n
+ * Step: \n
+ * 1. Randomly take a seed point from C and take it from C to S; \n
+ * 2. Find a point in C that is the farthest away from S and take it from C to S; \n
+ * (The distance from point to set S is the smallest distance from point to all points in S) \n
+ * 3. Repeat *step 2* until the farthest distance of the point in C from S \n
+ * is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*. \n
+ * Output: Sampled point cloud S \n
*/
int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
- const int sampled_pts_size, const float dist_lower_limit, RNG *rng)
+ const int sampled_pts_size, const float dist_lower_limit, RNG *rng)
{
CV_CheckGT(sampled_pts_size, 0, "The point cloud size after sampling must be greater than 0.");
- CV_CheckGE(dist_lower_limit, 0.0f, "The distance lower bound must be greater than or equal to 0.");
+ CV_CheckGE(dist_lower_limit, 0.0f,
+ "The distance lower bound must be greater than or equal to 0.");
// Get input point cloud data
Mat ori_pts;
- _getMatFromInputArray(input_pts, ori_pts);
+ // In order to keep the points continuous in memory (which allows better support for SIMD),
+ // the position of the points may be changed, data copying is mandatory
+ _getMatFromInputArray(input_pts, ori_pts, 1, true);
- const int ori_pts_size = ori_pts.rows;
+ const int ori_pts_size = ori_pts.rows * ori_pts.cols / 3;
CV_CheckLT(sampled_pts_size, ori_pts_size,
"The sampled point cloud size must be smaller than the original point cloud size.");
// idx arr [ . . . . . . . . . ] --- sampling ---> [ . . . . . . . . . ]
// C S | C
+ // _idxs records the original location/id of the point
AutoBuffer _idxs(ori_pts_size);
+ // _dist_square records the distance from point(in C) to S
AutoBuffer _dist_square(ori_pts_size);
int *idxs = _idxs.data();
float *dist_square = _dist_square.data();
@@ -277,39 +241,85 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
dist_square[i] = FLT_MAX;
}
- // Randomly take a seed point from C and put it into S
- int seed = (int)((rng? rng->next(): theRNG().next()) % ori_pts_size);
+ // Randomly take a seed point from C and take it from C to S
+ int seed = (int) ((rng ? rng->next() : theRNG().next()) % ori_pts_size);
idxs[0] = seed;
idxs[seed] = 0;
- std::vector _sampled_point_flags(ori_pts_size, 0);
- _sampled_point_flags[seed] = 1;
+ // Pointer (base address) of access point data x,y,z
+ float *const ori_pts_ptr_x = (float *) ori_pts.data;
+ float *const ori_pts_ptr_y = ori_pts_ptr_x + ori_pts_size;
+ float *const ori_pts_ptr_z = ori_pts_ptr_y + ori_pts_size;
+
+ // Ensure that the point(in C) data x,y,z is continuous in the memory respectively
+ _swap(ori_pts_ptr_x[seed], ori_pts_ptr_x[0]);
+ _swap(ori_pts_ptr_y[seed], ori_pts_ptr_y[0]);
+ _swap(ori_pts_ptr_z[seed], ori_pts_ptr_z[0]);
- float *const ori_pts_ptr = (float *) ori_pts.data;
int sampled_cnt = 1;
const float dist_lower_limit_square = dist_lower_limit * dist_lower_limit;
while (sampled_cnt < sampled_pts_size)
{
int last_pt = sampled_cnt - 1;
- float *const last_pt_ptr_base = ori_pts_ptr + 3 * idxs[last_pt];
- float last_pt_x = last_pt_ptr_base[0], last_pt_y = last_pt_ptr_base[1], last_pt_z = last_pt_ptr_base[2];
+ float last_pt_x = ori_pts_ptr_x[last_pt];
+ float last_pt_y = ori_pts_ptr_y[last_pt];
+ float last_pt_z = ori_pts_ptr_z[last_pt];
// Calculate the distance from point in C to set S
float max_dist_square = 0;
- for (int i = sampled_cnt; i < ori_pts_size; ++i)
+ int next_pt = sampled_cnt;
+ int i = sampled_cnt;
+#ifdef CV_SIMD
+ int k = (ori_pts_size - sampled_cnt) / v_float32::nlanes;
+ int end = sampled_cnt + v_float32::nlanes * k;
+
+ v_float32 v_last_p_x = vx_setall_f32(last_pt_x);
+ v_float32 v_last_p_y = vx_setall_f32(last_pt_y);
+ v_float32 v_last_p_z = vx_setall_f32(last_pt_z);
+
+ for (; i < end; i += v_float32::nlanes)
{
- float *const ori_pts_ptr_base = ori_pts_ptr + 3 * idxs[i];
- float x_diff = (last_pt_x - ori_pts_ptr_base[0]);
- float y_diff = (last_pt_y - ori_pts_ptr_base[1]);
- float z_diff = (last_pt_z - ori_pts_ptr_base[2]);
- float next_dist_square = x_diff * x_diff + y_diff * y_diff + z_diff * z_diff;
- if (next_dist_square < dist_square[i])
+ v_float32 vx_diff = v_last_p_x - vx_load(ori_pts_ptr_x + i);
+ v_float32 vy_diff = v_last_p_y - vx_load(ori_pts_ptr_y + i);
+ v_float32 vz_diff = v_last_p_z - vx_load(ori_pts_ptr_z + i);
+
+ v_float32 v_next_dist_square =
+ vx_diff * vx_diff + vy_diff * vy_diff + vz_diff * vz_diff;
+
+ // Update the distance from the points(in C) to S
+ float *dist_square_ptr = dist_square + i;
+ v_float32 v_dist_square = vx_load(dist_square_ptr);
+ v_dist_square = v_min(v_dist_square, v_next_dist_square);
+ vx_store(dist_square_ptr, v_dist_square);
+
+ // Find a point in C that is the farthest away from S and take it from C to S
+ if (v_check_any(v_dist_square > vx_setall_f32(max_dist_square)))
{
- dist_square[i] = next_dist_square;
+ for (int m = 0; m < v_float32::nlanes; ++m)
+ {
+ if (dist_square_ptr[m] > max_dist_square)
+ {
+ next_pt = i + m;
+ max_dist_square = dist_square_ptr[m];
+ }
+ }
}
+
+ }
+
+#endif
+ for (; i < ori_pts_size; ++i)
+ {
+ float x_diff = (last_pt_x - ori_pts_ptr_x[i]);
+ float y_diff = (last_pt_y - ori_pts_ptr_y[i]);
+ float z_diff = (last_pt_z - ori_pts_ptr_z[i]);
+ float next_dist_square = x_diff * x_diff + y_diff * y_diff + z_diff * z_diff;
+ // Update the distance from the points(in C) to S
+ dist_square[i] = std::min(dist_square[i], next_dist_square);
+ // Find a point in C that is the farthest away from S and take it from C to S
if (dist_square[i] > max_dist_square)
{
- last_pt = i;
+ next_pt = i;
max_dist_square = dist_square[i];
}
}
@@ -318,26 +328,40 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
if (max_dist_square < dist_lower_limit_square)
break;
- _sampled_point_flags[idxs[last_pt]] = 1;
- _swap(idxs[sampled_cnt], idxs[last_pt]);
- _swap(dist_square[sampled_cnt], dist_square[last_pt]);
+ // Take point next_pt from C to S
+ _swap(idxs[next_pt], idxs[sampled_cnt]);
+ _swap(dist_square[next_pt], dist_square[sampled_cnt]);
+
+ // Ensure that the point(in C) data x,y,z is continuous in the memory respectively
+ _swap(ori_pts_ptr_x[next_pt], ori_pts_ptr_x[sampled_cnt]);
+ _swap(ori_pts_ptr_y[next_pt], ori_pts_ptr_y[sampled_cnt]);
+ _swap(ori_pts_ptr_z[next_pt], ori_pts_ptr_z[sampled_cnt]);
+
++sampled_cnt;
}
+
+ std::vector _sampled_point_flags(ori_pts_size, 0);
+ for (int j = 0; j < sampled_cnt; ++j)
+ {
+ _sampled_point_flags[idxs[j]] = 1;
+ }
+
Mat(_sampled_point_flags).copyTo(sampled_point_flags);
return sampled_cnt;
} // farthestPointSampling()
int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts,
- const float sampled_scale, const float dist_lower_limit, RNG *rng)
+ const float sampled_scale, const float dist_lower_limit, RNG *rng)
{
CV_CheckGT(sampled_scale, 0.0f, "The point cloud sampled scale must greater than 0.");
CV_CheckLT(sampled_scale, 1.0f, "The point cloud sampled scale must less than 1.");
- CV_CheckGE(dist_lower_limit, 0.0f, "The distance lower bound must be greater than or equal to 0.");
+ CV_CheckGE(dist_lower_limit, 0.0f,
+ "The distance lower bound must be greater than or equal to 0.");
Mat ori_pts;
- _getMatFromInputArray(input_pts, ori_pts);
+ _getMatFromInputArray(input_pts, ori_pts, 1);
return farthestPointSampling(sampled_point_flags, input_pts,
- cvCeil(sampled_scale * ori_pts.rows), dist_lower_limit, rng);
+ cvCeil(sampled_scale * ori_pts.cols), dist_lower_limit, rng);
} // farthestPointSampling()
//! @} _3d