fixed bug in gpu::transformSmart with mask

13 years ago · 6c1de21a76
parent cec641fb83
commit 6c1de21a76
3 changed files with 21 additions and 13 deletions
--- a/modules/core/src/cuda/matrix_operations.cu
+++ b/modules/core/src/cuda/matrix_operations.cu
@ -59,27 +59,32 @@ namespace cv { namespace gpu { namespace device
    ////////////////////////////////// CopyTo /////////////////////////////////
    ///////////////////////////////////////////////////////////////////////////

-    template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream)
-    {        
-        cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, channels), stream);
+    template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
+    {
+        if (colorMask)
+            cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMask(mask), stream);
+        else
+            cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
    }

-    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream)
+    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
    {
-        typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream);
+        typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);

        static func_t tab[] =
        {
+            0,
            copyToWithMask<unsigned char>,
-            copyToWithMask<signed char>,
            copyToWithMask<unsigned short>,
-            copyToWithMask<short>,
+            0,
            copyToWithMask<int>,
-            copyToWithMask<float>,
+            0,
+            0,
+            0,
            copyToWithMask<double>
        };

-        tab[depth](src, dst, mask, channels, stream);
+        tab[elemSize1](src, dst, cn, mask, colorMask, stream);
    }

    ///////////////////////////////////////////////////////////////////////////
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@ -348,7 +348,7 @@ namespace

 namespace cv { namespace gpu { namespace device
 {
-    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream);
+    void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);

    template <typename T>
    void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);
@ -405,7 +405,10 @@ namespace cv { namespace gpu
 {
    CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
    {
-        cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.depth(), src.channels(), mask, stream);
+        CV_Assert(src.size() == dst.size() && src.type() == dst.type());
+        CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()));
+
+        cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream);
    }

    CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)
--- a/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/detail/transform_detail.hpp
@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
                if (x_shifted + ft::smart_shift - 1 < src_.cols)
                {
                    const read_type src_n_el = ((const read_type*)src)[x];
-                    write_type dst_n_el;
+                    write_type dst_n_el = ((const write_type*)dst)[x];

                    OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);

@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace device
                {
                    const read_type1 src1_n_el = ((const read_type1*)src1)[x];
                    const read_type2 src2_n_el = ((const read_type2*)src2)[x];
-                    write_type dst_n_el;
+                    write_type dst_n_el = ((const write_type*)dst)[x];
                    
                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);