element operations

12 years ago · 11c6eb6305
parent f00efcfc59
commit 11c6eb6305
4 changed files with 4900 additions and 2806 deletions
--- a/modules/gpu/include/opencv2/gpu/device/functional.hpp
+++ b/modules/gpu/include/opencv2/gpu/device/functional.hpp
@ -357,6 +357,9 @@ namespace cv { namespace gpu { namespace device
        {
            return abs(x);
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
    {
@ -364,6 +367,9 @@ namespace cv { namespace gpu { namespace device
        {
            return x;
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<signed char> : unary_function<signed char, signed char>
    {
@ -371,6 +377,9 @@ namespace cv { namespace gpu { namespace device
        {
            return ::abs(x);
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<char> : unary_function<char, char>
    {
@ -378,6 +387,9 @@ namespace cv { namespace gpu { namespace device
        {
            return ::abs(x);
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
    {
@ -385,6 +397,9 @@ namespace cv { namespace gpu { namespace device
        {
            return x;
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<short> : unary_function<short, short>
    {
@ -392,6 +407,9 @@ namespace cv { namespace gpu { namespace device
        {
            return ::abs(x);
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
    {
@ -399,6 +417,9 @@ namespace cv { namespace gpu { namespace device
        {
            return x;
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<int> : unary_function<int, int>
    {
@ -406,6 +427,9 @@ namespace cv { namespace gpu { namespace device
        {
            return ::abs(x);
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<float> : unary_function<float, float>
    {
@ -413,6 +437,9 @@ namespace cv { namespace gpu { namespace device
        {
            return ::fabsf(x);
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<double> : unary_function<double, double>
    {
@ -420,6 +447,9 @@ namespace cv { namespace gpu { namespace device
        {
            return ::fabs(x);
        }
+
+        __device__ __forceinline__ abs_func() {}
+        __device__ __forceinline__ abs_func(const abs_func&) {}
    };

 #define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \
@ -429,6 +459,8 @@ namespace cv { namespace gpu { namespace device
        { \
            return func ## f(v); \
        } \
+        __device__ __forceinline__ name ## _func() {} \
+        __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    }; \
    template <> struct name ## _func<double> : unary_function<double, double> \
    { \
@ -436,6 +468,8 @@ namespace cv { namespace gpu { namespace device
        { \
            return func(v); \
        } \
+        __device__ __forceinline__ name ## _func() {} \
+        __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    };

 #define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
--- a/modules/gpu/src/cuda/element_operations.cu
+++ b/modules/gpu/src/cuda/element_operations.cu
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
--- a/modules/gpu/test/test_core.cpp
+++ b/modules/gpu/test/test_core.cpp
@ -210,7 +210,6 @@ TEST_P(Add_Array, Accuracy)
 {
    cv::Mat mat1 = randomMat(size, stype);
    cv::Mat mat2 = randomMat(size, stype);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);

    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
    {
@ -228,10 +227,10 @@ TEST_P(Add_Array, Accuracy)
    {
        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);
+        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);

        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
-        cv::add(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);
+        cv::add(mat1, mat2, dst_gold, cv::noArray(), depth.second);

        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
    }
@ -244,6 +243,67 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array, testing::Combine(
    ALL_CHANNELS,
    WHOLE_SUBMAT));

+PARAM_TEST_CASE(Add_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, 1);
+        dtype = CV_MAKE_TYPE(depth.second, 1);
+    }
+};
+
+TEST_P(Add_Array_Mask, Accuracy)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
+
+        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
+        cv::add(mat1, mat2, dst_gold, mask, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array_Mask, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
 ////////////////////////////////////////////////////////////////////////////////
 // Add_Scalar

@ -365,7 +425,6 @@ TEST_P(Subtract_Array, Accuracy)
 {
    cv::Mat mat1 = randomMat(size, stype);
    cv::Mat mat2 = randomMat(size, stype);
-    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);

    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
    {
@ -383,10 +442,10 @@ TEST_P(Subtract_Array, Accuracy)
    {
        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
        dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);
+        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);

        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
-        cv::subtract(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);
+        cv::subtract(mat1, mat2, dst_gold, cv::noArray(), depth.second);

        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
    }
@ -399,6 +458,67 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array, testing::Combine(
    ALL_CHANNELS,
    WHOLE_SUBMAT));

+PARAM_TEST_CASE(Subtract_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size size;
+    std::pair<MatDepth, MatDepth> depth;
+    bool useRoi;
+
+    int stype;
+    int dtype;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        depth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        stype = CV_MAKE_TYPE(depth.first, 1);
+        dtype = CV_MAKE_TYPE(depth.second, 1);
+    }
+};
+
+TEST_P(Subtract_Array_Mask, Accuracy)
+{
+    cv::Mat mat1 = randomMat(size, stype);
+    cv::Mat mat2 = randomMat(size, stype);
+    cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
+
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
+        }
+    }
+    else
+    {
+        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        dst.setTo(cv::Scalar::all(0));
+        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
+
+        cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
+        cv::subtract(mat1, mat2, dst_gold, mask, depth.second);
+
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array_Mask, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    DEPTH_PAIRS,
+    WHOLE_SUBMAT));
+
 ////////////////////////////////////////////////////////////////////////////////
 // Subtract_Scalar

@ -541,7 +661,7 @@ TEST_P(Multiply_Array, WithOutScale)
        cv::Mat dst_gold;
        cv::multiply(mat1, mat2, dst_gold, 1, depth.second);

-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0);
    }
 }

@ -571,7 +691,7 @@ TEST_P(Multiply_Array, WithScale)
        cv::Mat dst_gold;
        cv::multiply(mat1, mat2, dst_gold, scale, depth.second);

-        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, 2.0);
    }
 }

@ -726,7 +846,7 @@ TEST_P(Multiply_Scalar, WithOutScale)
        cv::Mat dst_gold;
        cv::multiply(mat, val, dst_gold, 1, depth.second);

-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
    }
 }

@ -757,7 +877,7 @@ TEST_P(Multiply_Scalar, WithScale)
        cv::Mat dst_gold;
        cv::multiply(mat, val, dst_gold, scale, depth.second);

-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
    }
 }

@ -1037,7 +1157,7 @@ TEST_P(Divide_Scalar, WithScale)
        cv::Mat dst_gold;
        cv::divide(mat, val, dst_gold, scale, depth.second);

-        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
+        EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0);
    }
 }