From 4595e3aa3ee3c2d331e5a86e880c360e3a0f4605 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 12:55:38 +0400 Subject: [PATCH] switched to Input/Output Array in gpu::multiply --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 6 +- modules/gpuarithm/src/element_operations.cpp | 348 ++++++++---------- .../test/test_element_operations.cpp | 87 +++++ 3 files changed, 252 insertions(+), 189 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index ea3593bdcf..2b367fa1bf 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -57,10 +57,8 @@ CV_EXPORTS void add(InputArray src1, InputArray src2, OutputArray dst, InputArra //! subtracts one matrix from another (dst = src1 - src2) CV_EXPORTS void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted product of the two arrays (c = scale * a * b) -CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! weighted multiplies matrix to a scalar (c = scale * a * s) -CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); +//! computes element-wise weighted product of the two arrays (dst = scale * src1 * src2) +CV_EXPORTS void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); //! computes element-wise weighted quotient of the two arrays (c = a / b) CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 400968453a..76bfb9ba33 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -51,8 +51,7 @@ void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } +void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } @@ -880,127 +879,92 @@ namespace arithm void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } -void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) +static void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { - using namespace arithm; - - cudaStream_t stream = StreamAccessor::getStream(s); - - if (src1.type() == CV_8UC4 && src2.type() == CV_32FC1) - { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - mulMat_8uc4_32f(src1, src2, dst, stream); - } - else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) - { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - mulMat_16sc4_32f(src1, src2, dst, stream); - } - else + typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + static const func_t funcs[7][7] = { - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - static const func_t funcs[7][7] = { - { - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat - } - }; - - if (dtype < 0) - dtype = src1.depth(); - - const int sdepth = src1.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); - const int cn = src1.channels(); - - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - - if (sdepth == CV_64F || ddepth == CV_64F) + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat } + }; - dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); + const int sdepth = src1.depth(); + const int ddepth = dst.depth(); + const int cn = src1.channels(); - PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); - PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); - PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); + cudaStream_t stream = StreamAccessor::getStream(_stream); - const func_t func = funcs[sdepth][ddepth]; + PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); + PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); + PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + const func_t func = funcs[sdepth][ddepth]; - func(src1_, src2_, dst_, scale, stream); - } + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, scale, stream); } namespace arithm @@ -1009,75 +973,73 @@ namespace arithm void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) +static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, Stream& _stream) { - using namespace arithm; - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = { { - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar } }; @@ -1093,32 +1055,16 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double {0 , 0, 0 , 0 } }; - if (dtype < 0) - dtype = src.depth(); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn <= 4 ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const Scalar nsc(sc.val[0] * scale, sc.val[1] * scale, sc.val[2] * scale, sc.val[3] * scale); + cudaStream_t stream = StreamAccessor::getStream(_stream); const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; if (ddepth == sdepth && cn > 1 && npp_func != 0) { - npp_func(src, nsc, dst, stream); + npp_func(src, val, dst, stream); return; } @@ -1129,7 +1075,39 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src, nsc.val[0], dst, stream); + func(src, val[0], dst, stream); +} + +void cv::gpu::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream) +{ + if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) + { + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); + + arithm::mulMat_8uc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); + } + else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) + { + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); + + arithm::mulMat_16sc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); + } + else + { + arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, mulMat, mulScalar); + } } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index 73974d3ad4..ece38311e3 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -932,6 +932,93 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar, testing::Combine( DEPTH_PAIRS, WHOLE_SUBMAT)); +//////////////////////////////////////////////////////////////////////////////// +// Multiply_Scalar_First + +PARAM_TEST_CASE(Multiply_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + std::pair depth; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Multiply_Scalar_First, WithOutScale) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::multiply(val, loadMat(mat), dst, 1, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + cv::gpu::multiply(val, loadMat(mat, useRoi), dst, 1, depth.second); + + cv::Mat dst_gold; + cv::multiply(val, mat, dst_gold, 1, depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, 1.0); + } +} + + +GPU_TEST_P(Multiply_Scalar_First, WithScale) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + double scale = randomDouble(0.0, 255.0); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::multiply(val, loadMat(mat), dst, scale, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + cv::gpu::multiply(val, loadMat(mat, useRoi), dst, scale, depth.second); + + cv::Mat dst_gold; + cv::multiply(val, mat, dst_gold, scale, depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, 1.0); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar_First, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + DEPTH_PAIRS, + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // Divide_Array