From a308dfca9856574d37abe7628b965e29861fb105 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Wed, 30 Aug 2023 14:53:59 +0800 Subject: [PATCH] core: add broadcast (#23965) * add broadcast_to with tests * change name * fix test * fix implicit type conversion * replace type of shape with InputArray * add perf test * add perf tests which takes care of axis * v2 from ficus expand * rename to broadcast * use randu in place of declare * doc improvement; smaller scale in perf * capture get_index by reference --- modules/core/include/opencv2/core.hpp | 7 + modules/core/perf/perf_arithm.cpp | 27 ++++ modules/core/src/matrix_transform.cpp | 218 ++++++++++++++++++++++++++ modules/core/test/test_arithm.cpp | 133 ++++++++++++++++ 4 files changed, 385 insertions(+) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index d9a21701f2..7b5108fcc4 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -1118,6 +1118,13 @@ CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode); */ CV_EXPORTS_W void flipND(InputArray src, OutputArray dst, int axis); +/** @brief Broadcast the given Mat to the given shape. + * @param src input array + * @param shape target shape. Should be a list of CV_32S numbers. Note that negative values are not supported. + * @param dst output array that has the given shape + */ +CV_EXPORTS_W void broadcast(InputArray src, InputArray shape, OutputArray dst); + enum RotateFlags { ROTATE_90_CLOCKWISE = 0, //!, perf::MatType, std::vector>>; typedef Size_MatType BinaryOpTest; +PERF_TEST_P_(BroadcastTest, basic) +{ + std::vector shape_src = get<0>(GetParam()); + int dt_type = get<1>(GetParam()); + std::vector shape_dst = get<2>(GetParam()); + + cv::Mat src(static_cast(shape_src.size()), shape_src.data(), dt_type); + cv::Mat dst(static_cast(shape_dst.size()), shape_dst.data(), dt_type); + + cv::randu(src, -1.f, 1.f); + + TEST_CYCLE() cv::broadcast(src, shape_dst, dst); + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/ , BroadcastTest, + testing::Combine( + testing::Values(std::vector{1, 100, 800}, + std::vector{10, 1, 800}, + std::vector{10, 100, 1}), + testing::Values(CV_32FC1), + testing::Values(std::vector{10, 100, 800}) + ) +); + PERF_TEST_P_(BinaryOpTest, min) { Size sz = get<0>(GetParam()); diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp index 744ee69b0d..43bf9be057 100644 --- a/modules/core/src/matrix_transform.cpp +++ b/modules/core/src/matrix_transform.cpp @@ -7,6 +7,7 @@ #include "opencv2/core/detail/dispatch_helper.impl.hpp" #include // std::swap_ranges +#include // std::accumulate namespace cv { @@ -857,6 +858,223 @@ void flipND(InputArray _src, OutputArray _dst, int _axis) flipNDImpl(dst.ptr(), dst.size.p, dst.step.p, axis); } +/* + This function first prepends 1 to each tensor shape to have a common max_ndims dimension, then flatten non-broadcast dimensions. +*/ +static bool _flatten_for_broadcast(int narrays, int max_ndims, const int* ndims, const int** orig_shape, + int** flatten_shape, size_t** flatten_step) { + int i, j, k; + + // step 1. + // * make all inputs and the output max_ndims-dimensional. + // * compute proper step's + for (i = max_ndims - 1; i >= 0; i-- ) { + for (k = 0; k < narrays; k++) { + j = ndims[k] - (max_ndims - i); + int sz_i = j >= 0 ? orig_shape[k][j] : 1; + size_t st_i = i == max_ndims - 1 ? 1 : flatten_step[k][i+1] * flatten_shape[k][i+1]; + flatten_shape[k][i] = sz_i; + flatten_step[k][i] = st_i; + if (flatten_shape[k][i] == 0) + return false; + } + } + + // step 2. Let's do the flattening first, + // since we'd need proper values of steps to check continuity. + // this loop is probably the most tricky part + // in the whole implementation of broadcasting. + j = max_ndims-1; + for (i = j - 1; i >= 0; i--) { + bool all_contiguous = true, all_scalars = true, all_consistent = true; + for(k = 0; k < narrays; k++) { + size_t st = flatten_step[k][j] * flatten_shape[k][j]; + bool prev_scalar = flatten_shape[k][j] == 1; + bool scalar = flatten_shape[k][i] == 1; + all_contiguous = all_contiguous && (st == flatten_step[k][i]); + all_scalars = all_scalars && scalar; + all_consistent = all_consistent && (scalar == prev_scalar); + } + if (all_contiguous && (all_consistent || all_scalars)) { + for(k = 0; k < narrays; k++) + flatten_shape[k][j] *= flatten_shape[k][i]; + } else { + j--; + if (i < j) { + for(k = 0; k < narrays; k++) { + flatten_shape[k][j] = flatten_shape[k][i]; + flatten_step[k][j] = flatten_step[k][i]; + } + } + } + } + + // step 3. Set some step's to 0's. + for (i = max_ndims-1; i >= j; i--) { + for (k = 0; k < narrays; k++) + flatten_step[k][i] = flatten_shape[k][i] == 1 ? 0 : flatten_step[k][i]; + } + for (; i >= 0; i--) { + for (k = 0; k < narrays; k++) { + flatten_step[k][i] = 0; + flatten_shape[k][i] = 1; + } + } + return true; +} + +void broadcast(InputArray _src, InputArray _shape, OutputArray _dst) { + CV_INSTRUMENT_REGION(); + + Mat src = _src.getMat(); + CV_CheckTrue(src.isContinuous(), "broadcast: input array must be contiguous"); + CV_CheckChannelsEQ(src.channels(), 1, "broadcast: input array must be single channel"); + + Mat shape = _shape.getMat(); + CV_CheckTypeEQ(shape.type(), CV_32S, "broadcast: target shape must be of type int32"); + const auto dims_shape = static_cast(shape.total()); + const auto *ptr_shape = shape.ptr(); + + // check valid shape, 1D/0D Mat would fail in the following checks + const auto dims_src = src.dims; + CV_CheckLE(dims_src, dims_shape, + "broadcast: dimension of input array must be less than or equal to dimension of target shape"); + std::vector shape_src{src.size.p, src.size.p + dims_src}; + if (shape_src.size() < static_cast(dims_shape)) { + shape_src.insert(shape_src.begin(), dims_shape - shape_src.size(), 1); + } + for (int i = 0; i < static_cast(shape_src.size()); ++i) { + const auto *shape_target = ptr_shape; + if (shape_src[i] != 1) { + CV_CheckEQ(shape_src[i], shape_target[i], "target shape must be equal to input shape or 1"); + } + } + + // impl + _dst.create(dims_shape, shape.ptr(), src.type()); + Mat dst = _dst.getMat(); + std::vector is_same_shape(dims_shape, 0); + for (int i = 0; i < static_cast(shape_src.size()); ++i) { + if (shape_src[i] == ptr_shape[i]) { + is_same_shape[i] = 1; + } + } + // copy if same shape + if (std::accumulate(is_same_shape.begin(), is_same_shape.end(), 1, std::multiplies()) != 0) { + const auto *p_src = src.ptr(); + auto *p_dst = dst.ptr(); + std::memcpy(p_dst, p_src, dst.total() * dst.elemSize()); + return; + } + // other cases + int max_ndims = std::max(dims_src, dims_shape); + const int all_ndims[2] = {src.dims, dst.dims}; + const int* orig_shapes[2] = {src.size.p, dst.size.p}; + cv::AutoBuffer buff(max_ndims * 4); + int* flatten_shapes[2] = {(int*)buff.data(), (int*)(buff.data() + max_ndims)}; + size_t* flatten_steps[2] = {(size_t*)(buff.data() + 2 * max_ndims), (size_t*)(buff.data() + 3 * max_ndims)}; + if (_flatten_for_broadcast(2, max_ndims, all_ndims, orig_shapes, flatten_shapes, flatten_steps)) { + size_t src_dp = flatten_steps[0][max_ndims - 1]; + size_t dst_dp = flatten_steps[1][max_ndims - 1]; + CV_Assert(dst_dp == 1); + CV_Assert(max_ndims >= 2); // >= 3? + size_t rowstep_src = flatten_steps[0][max_ndims - 2]; + size_t rowstep_dst = flatten_steps[1][max_ndims - 2]; + const char* ptr_src = src.ptr(); + char* ptr_dst = dst.ptr(); + size_t esz = src.elemSize(); + int nrows = flatten_shapes[1][max_ndims - 2]; + int ncols = flatten_shapes[1][max_ndims - 1]; + int nplanes = 1; + CV_Check(esz, esz == 1 || esz == 2 || esz == 4 || esz == 8, "broadcast: not supported data type"); + + for (int k = 0; k < max_ndims - 2; k++) { + nplanes *= flatten_shapes[1][k]; + } + for (int plane_idx = 0; plane_idx < nplanes; plane_idx++) { + size_t offset_src = 0, offset_dst = 0; + size_t idx = (size_t)plane_idx; + for (int k = max_ndims - 3; k >= 0; k--) { + size_t prev_idx = idx / flatten_shapes[1][k]; + size_t i_k = (int)(idx - prev_idx * flatten_shapes[1][k]); + offset_src += i_k * flatten_steps[0][k]; + offset_dst += i_k * flatten_steps[1][k]; + idx = prev_idx; + } + + #define OPENCV_CORE_BROADCAST_LOOP(_Tp) \ + for (int i = 0; i < nrows; i++) { \ + const _Tp *ptr_src_ = (const _Tp*)ptr_src + offset_src + rowstep_src * i; \ + _Tp *ptr_dst_ = (_Tp*)ptr_dst + offset_dst + rowstep_dst * i; \ + if (src_dp == 1) { \ + for (int j = 0; j < ncols; j++) { \ + ptr_dst_[j] = ptr_src_[j]; \ + } \ + } else { \ + _Tp x = *ptr_src_; \ + for (int j = 0; j < ncols; j++) { \ + ptr_dst_[j] = x; \ + } \ + } \ + } + + if (esz == 1) { + OPENCV_CORE_BROADCAST_LOOP(int8_t); + } else if (esz == 2) { + OPENCV_CORE_BROADCAST_LOOP(int16_t); + } else if (esz == 4) { + OPENCV_CORE_BROADCAST_LOOP(int32_t); + } else if (esz == 8) { + OPENCV_CORE_BROADCAST_LOOP(int64_t); + } else { + CV_Error(cv::Error::StsNotImplemented, ""); + } + #undef OPENCV_CORE_BROADCAST_LOOP + } + } else { + // initial copy (src to dst) + std::vector step_src{src.step.p, src.step.p + dims_src}; + if (step_src.size() < static_cast(dims_shape)) { + step_src.insert(step_src.begin(), dims_shape - step_src.size(), step_src[0]); + } + for (size_t i = 0; i < src.total(); ++i) { + size_t t = i; + size_t src_offset = 0, dst_offset = 0; + for (int j = static_cast(shape_src.size() - 1); j >= 0; --j) { + size_t idx = t / shape_src[j]; + size_t offset = static_cast(t - idx * shape_src[j]); + src_offset += offset * step_src[j]; + dst_offset += offset * dst.step[j]; + t = idx; + } + const auto *p_src = src.ptr(); + auto *p_dst = dst.ptr(); + std::memcpy(p_dst + dst_offset, p_src + src_offset, dst.elemSize()); + } + // broadcast copy (dst inplace) + std::vector cumulative_shape(dims_shape, 1); + int total = static_cast(dst.total()); + for (int i = dims_shape - 1; i >= 0; --i) { + cumulative_shape[i] = static_cast(total / ptr_shape[i]); + total = cumulative_shape[i]; + } + for (int i = dims_shape - 1; i >= 0; --i) { + if (is_same_shape[i] == 1) { + continue; + } + auto step = dst.step[i]; + auto *p_dst = dst.ptr(); + for (int j = 0; j < cumulative_shape[i]; j++) { + for (int k = 0; k < ptr_shape[i] - 1; k++) { + std::memcpy(p_dst + step, p_dst, step); + p_dst += step; + } + p_dst += step; + } + } + } +} + void rotate(InputArray _src, OutputArray _dst, int rotateMode) { CV_Assert(_src.dims() <= 2); diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index ea9cda56be..848a2e8b6a 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2268,6 +2268,139 @@ INSTANTIATE_TEST_CASE_P(Arithm, FlipND, testing::Combine( testing::Values(perf::MatType(CV_8UC1), CV_32FC1) )); +TEST(BroadcastTo, basic) { + std::vector shape_src{2, 1}; + std::vector data_src{1, 2}; + Mat src(static_cast(shape_src.size()), shape_src.data(), CV_32SC1, data_src.data()); + + auto get_index = [](const std::vector& shape, size_t cnt) { + std::vector index(shape.size()); + size_t t = cnt; + for (int i = static_cast(shape.size() - 1); i >= 0; --i) { + size_t idx = t / shape[i]; + index[i] = static_cast(t - idx * shape[i]); + t = idx; + } + return index; + }; + + auto fn_verify = [&get_index](const Mat& ref, const Mat& res) { + // check type + EXPECT_EQ(ref.type(), res.type()); + // check shape + EXPECT_EQ(ref.dims, res.dims); + for (int i = 0; i < ref.dims; ++i) { + EXPECT_EQ(ref.size[i], res.size[i]); + } + // check value + std::vector shape{ref.size.p, ref.size.p + ref.dims}; + for (size_t i = 0; i < ref.total(); ++i) { + auto index = get_index(shape, i); + switch (ref.type()) { + case CV_32SC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + case CV_8UC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + case CV_32FC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + default: FAIL() << "Unsupported type: " << ref.type(); + } + } + }; + + { + std::vector shape{4, 2, 3}; + std::vector data_ref{ + 1, 1, 1, // [0, 0, :] + 2, 2, 2, // [0, 1, :] + 1, 1, 1, // [1, 0, :] + 2, 2, 2, // [1, 1, :] + 1, 1, 1, // [2, 0, :] + 2, 2, 2, // [2, 1, :] + 1, 1, 1, // [3, 0, :] + 2, 2, 2 // [3, 1, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), src.type(), data_ref.data()); + Mat dst; + broadcast(src, shape, dst); + fn_verify(ref, dst); + } + + { + Mat _src; + src.convertTo(_src, CV_8U); + std::vector shape{4, 2, 3}; + std::vector data_ref{ + 1, 1, 1, // [0, 0, :] + 2, 2, 2, // [0, 1, :] + 1, 1, 1, // [1, 0, :] + 2, 2, 2, // [1, 1, :] + 1, 1, 1, // [2, 0, :] + 2, 2, 2, // [2, 1, :] + 1, 1, 1, // [3, 0, :] + 2, 2, 2 // [3, 1, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } + + { + Mat _src; + src.convertTo(_src, CV_32F); + std::vector shape{1, 1, 2, 1}; // {2, 1} + std::vector data_ref{ + 1.f, // [0, 0, 0, 0] + 2.f, // [0, 0, 1, 0] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } + + { + std::vector _shape_src{2, 3, 4}; + std::vector _data_src{ + 1.f, 2.f, 3.f, 4.f, // [0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [1, 2, :] + }; + Mat _src(static_cast(_shape_src.size()), _shape_src.data(), CV_32FC1, _data_src.data()); + + std::vector shape{2, 1, 2, 3, 4}; + std::vector data_ref{ + 1.f, 2.f, 3.f, 4.f, // [0, 0, 0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [0, 0, 0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [0, 0, 0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [0, 0, 1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [0, 0, 1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [0, 0, 1, 2, :] + + 1.f, 2.f, 3.f, 4.f, // [1, 0, 0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [1, 0, 0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [1, 0, 0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [1, 0, 1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [1, 0, 1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [1, 0, 1, 2, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } +} + TEST(Core_minMaxIdx, regression_9207_2) { const int rows = 13;