fixed bug in gpu::remap under win32

pull/13383/head
Vladislav Vinogradov 13 years ago
parent ca8c5b633c
commit df9f707ff0
  1. 35
      modules/gpu/src/cuda/imgproc.cu
  2. 23
      modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
  3. 2
      samples/gpu/performance/tests.cpp

@ -67,6 +67,24 @@ namespace cv { namespace gpu { namespace imgproc
} }
} }
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
{
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep_<T>, B<work_type> > brdSrc(src, brd);
Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
{ {
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue) static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue)
@ -163,22 +181,7 @@ namespace cv { namespace gpu { namespace imgproc
if (stream == 0) if (stream == 0)
RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue); RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue);
else else
callStream(src, mapx, mapy, dst, borderValue, stream); RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream);
}
static void callStream(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader< PtrStep_<T>, B<work_type> > brd_src(src, brd);
Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brd_src);
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
} }
}; };

@ -758,6 +758,29 @@ namespace cv { namespace gpu { namespace device
const Ptr2D ptr; const Ptr2D ptr;
const B b; const B b;
}; };
// under win32 there is some bug with templated types that passed as kernel parameters
// with this specialization all works fine
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
{
typedef typename BrdConstant<D>::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
src(src_), height(b.height), width(b.width), val(b.val)
{
}
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
const Ptr2D src;
const int height;
const int width;
const D val;
};
}}} }}}
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__ #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__

@ -80,7 +80,7 @@ TEST(remap)
gpu::GpuMat d_src, d_dst, d_xmap, d_ymap; gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
int interpolation = INTER_LINEAR; int interpolation = INTER_LINEAR;
int borderMode = BORDER_CONSTANT; int borderMode = BORDER_REPLICATE;
for (int size = 1000; size <= 4000; size *= 2) for (int size = 1000; size <= 4000; size *= 2)
{ {

Loading…
Cancel
Save