fixed bug in gpu::remap under win32

14 years ago · df9f707ff0
parent ca8c5b633c
commit df9f707ff0
3 changed files with 43 additions and 17 deletions
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@ -67,6 +67,24 @@ namespace cv { namespace gpu { namespace imgproc
        }
    }
    template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
    {
        static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
        {
            typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; 
            dim3 block(32, 8);
            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
            B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
            BorderReader< PtrStep_<T>, B<work_type> > brdSrc(src, brd);
            Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brdSrc);
            remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
            cudaSafeCall( cudaGetLastError() );
        }
    };
    template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
    {
        static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue)
@ -163,22 +181,7 @@ namespace cv { namespace gpu { namespace imgproc
            if (stream == 0)
                RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue);
            else
-                callStream(src, mapx, mapy, dst, borderValue, stream);
+                RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream);
        }
        static void callStream(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
        {
            typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; 
            dim3 block(32, 8);
            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
            B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
            BorderReader< PtrStep_<T>, B<work_type> > brd_src(src, brd);
            Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brd_src);
            remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
            cudaSafeCall( cudaGetLastError() );
        }
    };
--- a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
@ -758,6 +758,29 @@ namespace cv { namespace gpu { namespace device
        const Ptr2D ptr;
        const B b;
    };
    // under win32 there is some bug with templated types that passed as kernel parameters
    // with this specialization all works fine
    template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
    {
        typedef typename BrdConstant<D>::result_type elem_type;
        typedef typename Ptr2D::index_type index_type;
        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) : 
            src(src_), height(b.height), width(b.width), val(b.val) 
        {
        }
        __device__ __forceinline__ D operator ()(index_type y, index_type x) const
        {
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
        }
        const Ptr2D src;
        const int height;
        const int width;
        const D val;
    };
 }}}
 #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@ -80,7 +80,7 @@ TEST(remap)
    gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
    int interpolation = INTER_LINEAR;
-    int borderMode = BORDER_CONSTANT;
+    int borderMode = BORDER_REPLICATE;
    for (int size = 1000; size <= 4000; size *= 2)
    {