From 0cf0a5e9d4627c83ee4dfd162e6c1361103b3c29 Mon Sep 17 00:00:00 2001 From: Ruan <47767371+Ryyyc@users.noreply.github.com> Date: Tue, 30 Nov 2021 20:33:44 +0800 Subject: [PATCH] Merge pull request #21095 from No-Plane-Cannot-Be-Detected:next_SIMD Accelerated 3D point cloud Farthest Point Sampling calculation using SIMD. * Add several 3D point cloud sampling functions: Random, VoxelGrid, FarthestPoint. * Made some code detail changes and exposed the random number generator parameters at the interface. * Add simple tests for sampling. * Modify interface output parameters. * Modify interface return value. * The sampling test is modified for the new changes of function interface. * Improved test of VoxelGridFilterSampling * Improved test of VoxelGridFilterSampling and FPS. * Add test for the dist_lower_limit arguments of FPS function. * Optimization function _getMatFromInputArray. * Optimize the code style and some details according to the suggestions. * Clear prefix cv: in the source code. * Change the initialization of Mat in the sampling test. * 1. Unified code style 2. Optimize randomSampling method * 1. Optimize code comments. 2. Remove unused local variables. * Rebuild the structure of the test, make the test case more reliable, and change the code style. * Update test_sampling.cpp Fix a warning. * Use SIMD to optimize the farthest point sampling. * Optimize the farthest point sampling SIMD code. * 1. remove `\n` from the ptcloud.hpp comment. 2. updated the default value of the argument arrangement_of_points in the _getMatFromInputArray function in ptcloud_utils.hpp from 0 to 1, since the latter is more commonly used (such arrangement is easier for SIMD acceleration). 3. removed two functions in ptcloud_utils.hpp that were not used. * Remove the
in the comment. * Fix whitespace issues. --- modules/3d/include/opencv2/3d/ptcloud.hpp | 14 +- modules/3d/src/ptcloud/ptcloud_utils.hpp | 78 +++++++++ modules/3d/src/ptcloud/sampling.cpp | 198 ++++++++++++---------- 3 files changed, 196 insertions(+), 94 deletions(-) create mode 100644 modules/3d/src/ptcloud/ptcloud_utils.hpp diff --git a/modules/3d/include/opencv2/3d/ptcloud.hpp b/modules/3d/include/opencv2/3d/ptcloud.hpp index d1dec17ca1..110c5049ae 100644 --- a/modules/3d/include/opencv2/3d/ptcloud.hpp +++ b/modules/3d/include/opencv2/3d/ptcloud.hpp @@ -18,7 +18,7 @@ namespace cv { * point cloud data, in each voxel (i.e., 3D box), all the points present will be * approximated (i.e., downsampled) with the point closest to their centroid. * - * @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector or std::vector etc.) + * @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector or std::vector etc.) * sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected. * @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN. * @param length Grid length. @@ -63,17 +63,17 @@ CV_EXPORTS void randomSampling(OutputArray sampled_pts, InputArray input_pts, * @brief Point cloud sampling by Farthest Point Sampling(FPS). * * FPS Algorithm: - * Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit* - * Initialize: Set sampled point cloud S to the empty set - * Step: + * + Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit* + * + Initialize: Set sampled point cloud S to the empty set + * + Step: * 1. Randomly take a seed point from C and take it from C to S; * 2. Find a point in C that is the farthest away from S and take it from C to S; * (The distance from point to set S is the smallest distance from point to all points in S) * 3. Repeat *step 2* until the farthest distance of the point in C from S * is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*. - * Output: Sampled point cloud S + * + Output: Sampled point cloud S * - * @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector or std::vector etc.) + * @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector or std::vector etc.) * sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected. * @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN. * @param sampled_pts_size The desired point cloud size after sampling. @@ -89,7 +89,7 @@ CV_EXPORTS int farthestPointSampling(OutputArray sampled_point_flags, InputArray /** * @overload * - * @param sampled_point_flags (Output) Flags of the sampled point, (pass in std::vector or std::vector etc.) + * @param[out] sampled_point_flags Flags of the sampled point, (pass in std::vector or std::vector etc.) * sampled_point_flags[i] is 1 means i-th point selected, 0 means it is not selected. * @param input_pts Original point cloud, vector of Point3 or Mat of size Nx3/3xN. * @param sampled_scale Range (0, 1), the percentage of the sampled point cloud to the original size, diff --git a/modules/3d/src/ptcloud/ptcloud_utils.hpp b/modules/3d/src/ptcloud/ptcloud_utils.hpp new file mode 100644 index 0000000000..28cf05b4f2 --- /dev/null +++ b/modules/3d/src/ptcloud/ptcloud_utils.hpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + + +#ifndef OPENCV_3D_PTCLOUD_UTILS_HPP +#define OPENCV_3D_PTCLOUD_UTILS_HPP + +namespace cv { + +/** + * @brief Get cv::Mat with Nx3 or 3xN type CV_32FC1 from cv::InputArray. + * + * @param input_pts Point cloud xyz data. + * @param[out] mat Point cloud xyz data in cv::Mat with Nx3 or 3xN type CV_32FC1. + * @param arrangement_of_points The arrangement of point data in the matrix, + * 0 by row (Nx3, [x1, y1, z1, ..., xn, yn, zn]), + * 1 by column (3xN, [x1, ..., xn, y1, ..., yn, z1, ..., zn]). + * @param clone_data Flag to specify whether data cloning is mandatory. + * + * @note The following cases will clone data even if flag clone_data is false: + * 1. Data is discontinuous in memory + * 2. Data type is not float + * 3. The original arrangement of data is not the same as the expected new arrangement. + * For example, transforming from + * Nx3(x1, y1, z1, ..., xn, yn, zn) to 3xN(x1, ..., xn, y1, ..., yn, z1, ..., zn) + * + */ +inline void _getMatFromInputArray(InputArray input_pts, Mat &mat, + int arrangement_of_points = 1, bool clone_data = false) +{ + CV_Check(input_pts.dims(), input_pts.dims() < 3, + "Only support data with dimension less than 3."); + + // Guaranteed data can construct N×3 point clouds + int rows = input_pts.rows(), cols = input_pts.cols(), channels = input_pts.channels(); + size_t total = rows * cols * channels; + CV_Check(total, total % 3 == 0, + "total = input_pts.rows() * input_pts.cols() * input_pts.channels() must be an integer multiple of 3"); + + /** + Layout of point cloud data in memory space. + arrangement 0 : x1, y1, z1, ..., xn, yn, zn + For example, the input is std::vector, or std::vector, + or cv::Mat with type N×1 CV_32FC3 + arrangement 1 : x1, ..., xn, y1, ..., yn, z1, ..., zn + For example, the input is cv::Mat with type 3×N CV_32FC1 + */ + int ori_arrangement = (channels == 1 && rows == 3 && cols != 3) ? 1 : 0; + + // Convert to single channel without copying the data. + mat = ori_arrangement == 0 ? input_pts.getMat().reshape(1, (int) (total / 3)) + : input_pts.getMat(); + + if (ori_arrangement != arrangement_of_points) + { + Mat tmp; + transpose(mat, tmp); + swap(mat, tmp); + } + + if (mat.type() != CV_32F) + { + Mat tmp; + mat.convertTo(tmp, CV_32F); // Use float to store data + swap(mat, tmp); + } + + if (clone_data || (!mat.isContinuous())) + { + mat = mat.clone(); + } + +} + +} + +#endif //OPENCV_3D_PTCLOUD_UTILS_HPP diff --git a/modules/3d/src/ptcloud/sampling.cpp b/modules/3d/src/ptcloud/sampling.cpp index 5d0b13e58d..c4656221a7 100644 --- a/modules/3d/src/ptcloud/sampling.cpp +++ b/modules/3d/src/ptcloud/sampling.cpp @@ -5,6 +5,7 @@ #include "../precomp.hpp" #include "opencv2/3d/ptcloud.hpp" +#include "ptcloud_utils.hpp" #include namespace cv { @@ -20,52 +21,8 @@ static inline void _swap(Tp &n, Tp &m) m = tmp; } -/** - * Get cv::Mat with type N×3 CV_32FC1 from cv::InputArray - * Use different interpretations for the same memory data. - */ -static inline void _getMatFromInputArray(InputArray input_pts, Mat &mat) -{ - CV_Check(input_pts.dims(), input_pts.dims() < 3, - "Only support data with dimension less than 3."); - - // Guaranteed data can construct N×3 point clouds - int rows = input_pts.rows(), cols = input_pts.cols(), channels = input_pts.channels(); - size_t total = rows * cols * channels; - CV_Check(total, total % 3 == 0, - "total = input_pts.rows() * input_pts.cols() * input_pts.channels() must be an integer multiple of 3"); - - if (channels == 1 && rows == 3 && cols != 3) - { - // Layout of point cloud data in memory space: - // x1, ..., xn, y1, ..., yn, z1, ..., zn - // For example, the input is cv::Mat with type 3×N CV_32FC1 - transpose(input_pts, mat); - } - else - { - // Layout of point cloud data in memory space: - // x1, y1, z1, ..., xn, yn, zn - // For example, the input is std::vector, or std::vector, or cv::Mat with type N×1 CV_32FC3 - mat = input_pts.getMat().reshape(1, (int) (total / 3)); - } - - if (mat.type() != CV_32F) - { - Mat tmp; - mat.convertTo(tmp, CV_32F); // Use float to store data - swap(mat, tmp); - } - - if (!mat.isContinuous()) - { - mat = mat.clone(); - } - -} - int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts, - const float length, const float width, const float height) + const float length, const float width, const float height) { CV_CheckGT(length, 0.0f, "Invalid length of grid"); CV_CheckGT(width, 0.0f, "Invalid width of grid"); @@ -73,7 +30,7 @@ int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts, // Get input point cloud data Mat ori_pts; - _getMatFromInputArray(input_pts, ori_pts); + _getMatFromInputArray(input_pts, ori_pts, 0); const int ori_pts_size = ori_pts.rows; @@ -184,7 +141,8 @@ int voxelGridSampling(OutputArray sampled_point_flags, InputArray input_pts, return pts_new_size; } // voxelGrid() -void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sampled_pts_size, RNG *rng) +void +randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sampled_pts_size, RNG *rng) { CV_CheckGT(sampled_pts_size, 0, "The point cloud size after sampling must be greater than 0."); CV_CheckDepth(sampled_pts.depth(), sampled_pts.isMat() || sampled_pts.depth() == CV_32F, @@ -192,7 +150,7 @@ void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sam // Get input point cloud data Mat ori_pts; - _getMatFromInputArray(input_pts, ori_pts); + _getMatFromInputArray(input_pts, ori_pts, 0); const int ori_pts_size = ori_pts.rows; CV_CheckLT(sampled_pts_size, ori_pts_size, @@ -229,45 +187,51 @@ void randomSampling(OutputArray sampled_pts, InputArray input_pts, const int sam } // randomSampling() -void randomSampling(OutputArray sampled_pts, InputArray input_pts, const float sampled_scale, RNG *rng) +void +randomSampling(OutputArray sampled_pts, InputArray input_pts, const float sampled_scale, RNG *rng) { CV_CheckGT(sampled_scale, 0.0f, "The point cloud sampled scale must greater than 0."); CV_CheckLT(sampled_scale, 1.0f, "The point cloud sampled scale must less than 1."); Mat ori_pts; - _getMatFromInputArray(input_pts, ori_pts); + _getMatFromInputArray(input_pts, ori_pts, 0); randomSampling(sampled_pts, input_pts, cvCeil(sampled_scale * ori_pts.rows), rng); } // randomSampling() /** - * FPS Algorithm: - * Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit* - * Initialize: Set sampled point cloud S to the empty set - * Step: - * 1. Randomly take a seed point from C and take it from C to S; - * 2. Find a point in C that is the farthest away from S and take it from C to S; - * (The distance from point to set S is the smallest distance from point to all points in S) - * 3. Repeat *step 2* until the farthest distance of the point in C from S - * is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*. - * Output: Sampled point cloud S + * FPS Algorithm:\n + * Input: Point cloud *C*, *sampled_pts_size*, *dist_lower_limit* \n + * Initialize: Set sampled point cloud S to the empty set \n + * Step: \n + * 1. Randomly take a seed point from C and take it from C to S; \n + * 2. Find a point in C that is the farthest away from S and take it from C to S; \n + * (The distance from point to set S is the smallest distance from point to all points in S) \n + * 3. Repeat *step 2* until the farthest distance of the point in C from S \n + * is less than *dist_lower_limit*, or the size of S is equal to *sampled_pts_size*. \n + * Output: Sampled point cloud S \n */ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts, - const int sampled_pts_size, const float dist_lower_limit, RNG *rng) + const int sampled_pts_size, const float dist_lower_limit, RNG *rng) { CV_CheckGT(sampled_pts_size, 0, "The point cloud size after sampling must be greater than 0."); - CV_CheckGE(dist_lower_limit, 0.0f, "The distance lower bound must be greater than or equal to 0."); + CV_CheckGE(dist_lower_limit, 0.0f, + "The distance lower bound must be greater than or equal to 0."); // Get input point cloud data Mat ori_pts; - _getMatFromInputArray(input_pts, ori_pts); + // In order to keep the points continuous in memory (which allows better support for SIMD), + // the position of the points may be changed, data copying is mandatory + _getMatFromInputArray(input_pts, ori_pts, 1, true); - const int ori_pts_size = ori_pts.rows; + const int ori_pts_size = ori_pts.rows * ori_pts.cols / 3; CV_CheckLT(sampled_pts_size, ori_pts_size, "The sampled point cloud size must be smaller than the original point cloud size."); // idx arr [ . . . . . . . . . ] --- sampling ---> [ . . . . . . . . . ] // C S | C + // _idxs records the original location/id of the point AutoBuffer _idxs(ori_pts_size); + // _dist_square records the distance from point(in C) to S AutoBuffer _dist_square(ori_pts_size); int *idxs = _idxs.data(); float *dist_square = _dist_square.data(); @@ -277,39 +241,85 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts, dist_square[i] = FLT_MAX; } - // Randomly take a seed point from C and put it into S - int seed = (int)((rng? rng->next(): theRNG().next()) % ori_pts_size); + // Randomly take a seed point from C and take it from C to S + int seed = (int) ((rng ? rng->next() : theRNG().next()) % ori_pts_size); idxs[0] = seed; idxs[seed] = 0; - std::vector _sampled_point_flags(ori_pts_size, 0); - _sampled_point_flags[seed] = 1; + // Pointer (base address) of access point data x,y,z + float *const ori_pts_ptr_x = (float *) ori_pts.data; + float *const ori_pts_ptr_y = ori_pts_ptr_x + ori_pts_size; + float *const ori_pts_ptr_z = ori_pts_ptr_y + ori_pts_size; + + // Ensure that the point(in C) data x,y,z is continuous in the memory respectively + _swap(ori_pts_ptr_x[seed], ori_pts_ptr_x[0]); + _swap(ori_pts_ptr_y[seed], ori_pts_ptr_y[0]); + _swap(ori_pts_ptr_z[seed], ori_pts_ptr_z[0]); - float *const ori_pts_ptr = (float *) ori_pts.data; int sampled_cnt = 1; const float dist_lower_limit_square = dist_lower_limit * dist_lower_limit; while (sampled_cnt < sampled_pts_size) { int last_pt = sampled_cnt - 1; - float *const last_pt_ptr_base = ori_pts_ptr + 3 * idxs[last_pt]; - float last_pt_x = last_pt_ptr_base[0], last_pt_y = last_pt_ptr_base[1], last_pt_z = last_pt_ptr_base[2]; + float last_pt_x = ori_pts_ptr_x[last_pt]; + float last_pt_y = ori_pts_ptr_y[last_pt]; + float last_pt_z = ori_pts_ptr_z[last_pt]; // Calculate the distance from point in C to set S float max_dist_square = 0; - for (int i = sampled_cnt; i < ori_pts_size; ++i) + int next_pt = sampled_cnt; + int i = sampled_cnt; +#ifdef CV_SIMD + int k = (ori_pts_size - sampled_cnt) / v_float32::nlanes; + int end = sampled_cnt + v_float32::nlanes * k; + + v_float32 v_last_p_x = vx_setall_f32(last_pt_x); + v_float32 v_last_p_y = vx_setall_f32(last_pt_y); + v_float32 v_last_p_z = vx_setall_f32(last_pt_z); + + for (; i < end; i += v_float32::nlanes) { - float *const ori_pts_ptr_base = ori_pts_ptr + 3 * idxs[i]; - float x_diff = (last_pt_x - ori_pts_ptr_base[0]); - float y_diff = (last_pt_y - ori_pts_ptr_base[1]); - float z_diff = (last_pt_z - ori_pts_ptr_base[2]); - float next_dist_square = x_diff * x_diff + y_diff * y_diff + z_diff * z_diff; - if (next_dist_square < dist_square[i]) + v_float32 vx_diff = v_last_p_x - vx_load(ori_pts_ptr_x + i); + v_float32 vy_diff = v_last_p_y - vx_load(ori_pts_ptr_y + i); + v_float32 vz_diff = v_last_p_z - vx_load(ori_pts_ptr_z + i); + + v_float32 v_next_dist_square = + vx_diff * vx_diff + vy_diff * vy_diff + vz_diff * vz_diff; + + // Update the distance from the points(in C) to S + float *dist_square_ptr = dist_square + i; + v_float32 v_dist_square = vx_load(dist_square_ptr); + v_dist_square = v_min(v_dist_square, v_next_dist_square); + vx_store(dist_square_ptr, v_dist_square); + + // Find a point in C that is the farthest away from S and take it from C to S + if (v_check_any(v_dist_square > vx_setall_f32(max_dist_square))) { - dist_square[i] = next_dist_square; + for (int m = 0; m < v_float32::nlanes; ++m) + { + if (dist_square_ptr[m] > max_dist_square) + { + next_pt = i + m; + max_dist_square = dist_square_ptr[m]; + } + } } + + } + +#endif + for (; i < ori_pts_size; ++i) + { + float x_diff = (last_pt_x - ori_pts_ptr_x[i]); + float y_diff = (last_pt_y - ori_pts_ptr_y[i]); + float z_diff = (last_pt_z - ori_pts_ptr_z[i]); + float next_dist_square = x_diff * x_diff + y_diff * y_diff + z_diff * z_diff; + // Update the distance from the points(in C) to S + dist_square[i] = std::min(dist_square[i], next_dist_square); + // Find a point in C that is the farthest away from S and take it from C to S if (dist_square[i] > max_dist_square) { - last_pt = i; + next_pt = i; max_dist_square = dist_square[i]; } } @@ -318,26 +328,40 @@ int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts, if (max_dist_square < dist_lower_limit_square) break; - _sampled_point_flags[idxs[last_pt]] = 1; - _swap(idxs[sampled_cnt], idxs[last_pt]); - _swap(dist_square[sampled_cnt], dist_square[last_pt]); + // Take point next_pt from C to S + _swap(idxs[next_pt], idxs[sampled_cnt]); + _swap(dist_square[next_pt], dist_square[sampled_cnt]); + + // Ensure that the point(in C) data x,y,z is continuous in the memory respectively + _swap(ori_pts_ptr_x[next_pt], ori_pts_ptr_x[sampled_cnt]); + _swap(ori_pts_ptr_y[next_pt], ori_pts_ptr_y[sampled_cnt]); + _swap(ori_pts_ptr_z[next_pt], ori_pts_ptr_z[sampled_cnt]); + ++sampled_cnt; } + + std::vector _sampled_point_flags(ori_pts_size, 0); + for (int j = 0; j < sampled_cnt; ++j) + { + _sampled_point_flags[idxs[j]] = 1; + } + Mat(_sampled_point_flags).copyTo(sampled_point_flags); return sampled_cnt; } // farthestPointSampling() int farthestPointSampling(OutputArray sampled_point_flags, InputArray input_pts, - const float sampled_scale, const float dist_lower_limit, RNG *rng) + const float sampled_scale, const float dist_lower_limit, RNG *rng) { CV_CheckGT(sampled_scale, 0.0f, "The point cloud sampled scale must greater than 0."); CV_CheckLT(sampled_scale, 1.0f, "The point cloud sampled scale must less than 1."); - CV_CheckGE(dist_lower_limit, 0.0f, "The distance lower bound must be greater than or equal to 0."); + CV_CheckGE(dist_lower_limit, 0.0f, + "The distance lower bound must be greater than or equal to 0."); Mat ori_pts; - _getMatFromInputArray(input_pts, ori_pts); + _getMatFromInputArray(input_pts, ori_pts, 1); return farthestPointSampling(sampled_point_flags, input_pts, - cvCeil(sampled_scale * ori_pts.rows), dist_lower_limit, rng); + cvCeil(sampled_scale * ori_pts.cols), dist_lower_limit, rng); } // farthestPointSampling() //! @} _3d