fixed several bugs in gpu arithm functions

refactored tests for them
13 years ago · 844bdea5ac
parent f58c40bfab
commit 844bdea5ac
5 changed files with 1601 additions and 780 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -638,11 +638,11 @@ CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, S
 //! pixel by pixel right shift of an image by a constant value
 //! supports 1, 3 and 4 channels images with integers elements
-CV_EXPORTS void rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+CV_EXPORTS void rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
 //! pixel by pixel left shift of an image by a constant value
 //! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
-CV_EXPORTS void lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
+CV_EXPORTS void lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
 //! computes per-element minimum of two arrays (dst = min(src1, src2))
 CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
@ -706,8 +706,8 @@ namespace cv { namespace gpu { namespace device
    {
        __device__ __forceinline__ short4 operator ()(short4 a, float b) const
        {
-            return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<uchar>(a.y / b),
+            return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
-                                        saturate_cast<short>(a.z / b), saturate_cast<uchar>(a.w / b))
+                                        saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
                          : make_short4(0,0,0,0);
        }
    };
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
@ -71,8 +71,8 @@ void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&,
 void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
 void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
 void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::rshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::rshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
-void cv::gpu::lshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
+void cv::gpu::lshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
 void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
 void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
 void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
@ -462,15 +462,14 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
        {0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>}
    };
-    static const func_t npp_funcs[7] = 
+    static const func_t npp_funcs[6] =
    {
        NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call,
        0,
        NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call,
        NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call,
        NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call,
-        NppArithm<CV_32F, nppiSub_32f_C1R>::call,
+        NppArithm<CV_32F, nppiSub_32f_C1R>::call
        subtract_gpu<double, double>
    };
    CV_Assert(src1.type() != CV_8S);
@ -484,7 +483,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
    cudaStream_t stream = StreamAccessor::getStream(s);
-    if (mask.empty() && dst.type() == src1.type())
+    if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)
    {
        npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream);
        return;
@ -734,15 +733,14 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
        {0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}
    };
-    static const func_t npp_funcs[7] = 
+    static const func_t npp_funcs[6] =
    {
        NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call,
        0,
        NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,
        NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,
        NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,
-        NppArithm<CV_32F, nppiDiv_32f_C1R>::call,
+        NppArithm<CV_32F, nppiDiv_32f_C1R>::call
        divide_gpu<double, double>
    };
    cudaStream_t stream = StreamAccessor::getStream(s);
@ -753,7 +751,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
        dst.create(src1.size(), src1.type());
-        multiply_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
+        divide_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
    }
    else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
    {
@ -761,7 +759,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
        dst.create(src1.size(), src1.type());
-        multiply_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
+        divide_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
    }
    else
    {
@ -773,7 +771,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
        dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
-        if (scale == 1 && dst.type() == src1.type())
+        if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
        {
            npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream);
            return;
@ -1729,7 +1727,7 @@ namespace
    };
 }
-void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
+void cv::gpu::rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
 {
    typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
    static const func_t funcs[5][4] =
@ -1749,7 +1747,7 @@ void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s
    funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream));
 }
-void cv::gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
+void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
 {
    typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
    static const func_t funcs[5][4] =
--- a/modules/gpu/test/test_arithm.cpp
+++ b/modules/gpu/test/test_arithm.cpp
--- a/modules/gpu/test/utility.hpp
+++ b/modules/gpu/test/utility.hpp
@ -162,10 +162,37 @@ CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX
 #define ALL_DEVICES testing::ValuesIn(devices())
 #define DEVICES(feature) testing::ValuesIn(devices(feature))
 #define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
 #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F))
 #define ALL_TYPES testing::ValuesIn(all_types())
 #define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
-#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
+#define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)),   \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)),  \
                                                                                        \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F)))
 #define WHOLE testing::Values(UseRoi(false))
 #define SUBMAT testing::Values(UseRoi(true))
@ -173,4 +200,6 @@ CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX
 #define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
 #define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
 #endif // __OPENCV_TEST_UTILITY_HPP__