restore cudaSafeCall

pull/761/head
Vladislav Vinogradov 12 years ago
parent 2bad639aee
commit b50090f850
  1. 8
      modules/core/include/opencv2/core/cuda/common.hpp
  2. 16
      modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
  3. 26
      modules/core/src/cuda/matrix_operations.cu
  4. 24
      modules/core/src/cudastream.cpp
  5. 50
      modules/core/src/gpumat.cpp
  6. 18
      modules/core/src/matrix_operations.cpp
  7. 20
      modules/core/src/opengl_interop.cpp
  8. 8
      modules/gpu/src/arithm.cpp
  9. 10
      modules/gpu/src/cascadeclassifier.cpp
  10. 4
      modules/gpu/src/color.cpp
  11. 6
      modules/gpu/src/cuda/NV12ToARGB.cu
  12. 36
      modules/gpu/src/cuda/bf_knnmatch.cu
  13. 24
      modules/gpu/src/cuda/bf_match.cu
  14. 16
      modules/gpu/src/cuda/bf_radius_match.cu
  15. 24
      modules/gpu/src/cuda/bgfg_gmg.cu
  16. 50
      modules/gpu/src/cuda/bgfg_mog.cu
  17. 6
      modules/gpu/src/cuda/bilateral_filter.cu
  18. 8
      modules/gpu/src/cuda/blend.cu
  19. 28
      modules/gpu/src/cuda/calib3d.cu
  20. 28
      modules/gpu/src/cuda/canny.cu
  21. 12
      modules/gpu/src/cuda/ccomponetns.cu
  22. 10
      modules/gpu/src/cuda/clahe.cu
  23. 8
      modules/gpu/src/cuda/column_filter.h
  24. 4
      modules/gpu/src/cuda/copy_make_border.cu
  25. 16
      modules/gpu/src/cuda/debayer.cu
  26. 24
      modules/gpu/src/cuda/disp_bilateral_filter.cu
  27. 20
      modules/gpu/src/cuda/fast.cu
  28. 24
      modules/gpu/src/cuda/fgd_bgfg.cu
  29. 10
      modules/gpu/src/cuda/gftt.cu
  30. 8
      modules/gpu/src/cuda/global_motion.cu
  31. 8
      modules/gpu/src/cuda/hist.cu
  32. 66
      modules/gpu/src/cuda/hog.cu
  33. 164
      modules/gpu/src/cuda/hough.cu
  34. 94
      modules/gpu/src/cuda/imgproc.cu
  35. 14
      modules/gpu/src/cuda/integral_image.cu
  36. 2
      modules/gpu/src/cuda/lbp.cu
  37. 56
      modules/gpu/src/cuda/match_template.cu
  38. 8
      modules/gpu/src/cuda/mathfunc.cu
  39. 44
      modules/gpu/src/cuda/matrix_reductions.cu
  40. 18
      modules/gpu/src/cuda/nlm.cu
  41. 8
      modules/gpu/src/cuda/optflowbm.cu
  42. 8
      modules/gpu/src/cuda/optical_flow.cu
  43. 46
      modules/gpu/src/cuda/optical_flow_farneback.cu
  44. 18
      modules/gpu/src/cuda/orb.cu
  45. 4
      modules/gpu/src/cuda/pyr_down.cu
  46. 4
      modules/gpu/src/cuda/pyr_up.cu
  47. 20
      modules/gpu/src/cuda/pyrlk.cu
  48. 14
      modules/gpu/src/cuda/remap.cu
  49. 18
      modules/gpu/src/cuda/resize.cu
  50. 8
      modules/gpu/src/cuda/rgb_to_yv12.cu
  51. 8
      modules/gpu/src/cuda/row_filter.h
  52. 24
      modules/gpu/src/cuda/split_merge.cu
  53. 32
      modules/gpu/src/cuda/stereobm.cu
  54. 56
      modules/gpu/src/cuda/stereobp.cu
  55. 68
      modules/gpu/src/cuda/stereocsbp.cu
  56. 16
      modules/gpu/src/cuda/tvl1flow.cu
  57. 26
      modules/gpu/src/cuda/warp.cu
  58. 24
      modules/gpu/src/element_operations.cpp
  59. 16
      modules/gpu/src/filtering.cpp
  60. 4
      modules/gpu/src/graphcuts.cpp
  61. 18
      modules/gpu/src/hough.cpp
  62. 22
      modules/gpu/src/imgproc.cpp
  63. 12
      modules/gpu/src/matrix_reductions.cpp
  64. 8
      modules/gpu/src/nvidia/core/NCVPyramid.cu
  65. 4
      modules/gpu/src/optflowbm.cpp
  66. 4
      modules/gpu/src/optical_flow.cpp
  67. 2
      modules/gpu/src/resize.cpp
  68. 2
      modules/gpu/src/warp.cpp
  69. 54
      modules/nonfree/src/cuda/surf.cu
  70. 20
      modules/nonfree/src/cuda/vibe.cu
  71. 4
      modules/nonfree/src/surf_gpu.cpp
  72. 2
      modules/softcascade/src/detector_cuda.cpp
  73. 14
      modules/superres/src/cuda/btv_l1_gpu.cu

@ -64,10 +64,12 @@ namespace cv { namespace gpu {
} }
}} }}
#ifndef cudaSafeCall
#if defined(__GNUC__) #if defined(__GNUC__)
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__) #define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */ #else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "") #define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
#endif
#endif #endif
namespace cv { namespace gpu namespace cv { namespace gpu
@ -104,7 +106,7 @@ namespace cv { namespace gpu
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img) template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{ {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
} }
} }
}} }}

@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1); const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op); transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1); const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op); transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<> struct TransformDispatcher<true> template<> struct TransformDispatcher<true>
@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1); const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op); transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T1, typename T2, typename D, typename BinOp, typename Mask> template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1); const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op); transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} // namespace transform_detail } // namespace transform_detail

@ -124,31 +124,31 @@ namespace cv { namespace gpu { namespace cudev
void writeScalar(const uchar* vals) void writeScalar(const uchar* vals)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) ); cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
} }
void writeScalar(const schar* vals) void writeScalar(const schar* vals)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) ); cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
} }
void writeScalar(const ushort* vals) void writeScalar(const ushort* vals)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) ); cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
} }
void writeScalar(const short* vals) void writeScalar(const short* vals)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) ); cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
} }
void writeScalar(const int* vals) void writeScalar(const int* vals)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) ); cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
} }
void writeScalar(const float* vals) void writeScalar(const float* vals)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) ); cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
} }
void writeScalar(const double* vals) void writeScalar(const double* vals)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) ); cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
} }
template<typename T> template<typename T>
@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1); dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step); set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall ( cudaDeviceSynchronize() ); cudaSafeCall ( cudaDeviceSynchronize() );
} }
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream); template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1); dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels); set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall ( cudaDeviceSynchronize() ); cudaSafeCall ( cudaDeviceSynchronize() );
} }
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream); template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
@ -290,8 +290,8 @@ namespace cv { namespace gpu { namespace cudev
template<typename T, typename D, typename S> template<typename T, typename D, typename S>
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream) void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{ {
cvCudaSafeCall( cudaSetDoubleForDevice(&alpha) ); cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cvCudaSafeCall( cudaSetDoubleForDevice(&beta) ); cudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta)); Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream); cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
} }

@ -131,14 +131,14 @@ bool cv::gpu::Stream::queryIfComplete()
if (err == cudaErrorNotReady || err == cudaSuccess) if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess; return err == cudaSuccess;
cvCudaSafeCall(err); cudaSafeCall(err);
return false; return false;
} }
void cv::gpu::Stream::waitForCompletion() void cv::gpu::Stream::waitForCompletion()
{ {
cudaStream_t stream = Impl::getStream(impl); cudaStream_t stream = Impl::getStream(impl);
cvCudaSafeCall( cudaStreamSynchronize(stream) ); cudaSafeCall( cudaStreamSynchronize(stream) );
} }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
@ -148,7 +148,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
cudaStream_t stream = Impl::getStream(impl); cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize(); size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) ); cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
} }
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
@ -157,7 +157,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
cudaStream_t stream = Impl::getStream(impl); cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize(); size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) ); cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
} }
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst) void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
@ -166,7 +166,7 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl); cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize(); size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) ); cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
} }
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
@ -175,7 +175,7 @@ void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl); cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize(); size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) ); cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
} }
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
@ -184,7 +184,7 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
cudaStream_t stream = Impl::getStream(impl); cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize(); size_t bwidth = src.cols * src.elemSize();
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
} }
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val) void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
@ -201,7 +201,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0) if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
{ {
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) ); cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
return; return;
} }
@ -212,7 +212,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3])) if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
{ {
int ival = saturate_cast<uchar>(val[0]); int ival = saturate_cast<uchar>(val[0]);
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) ); cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
return; return;
} }
} }
@ -299,7 +299,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
cudaStream_t stream = Impl::getStream(impl); cudaStream_t stream = Impl::getStream(impl);
cvCudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) ); cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
#else #else
(void) callback; (void) callback;
(void) userData; (void) userData;
@ -328,7 +328,7 @@ void cv::gpu::Stream::create()
release(); release();
cudaStream_t stream; cudaStream_t stream;
cvCudaSafeCall( cudaStreamCreate( &stream ) ); cudaSafeCall( cudaStreamCreate( &stream ) );
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl)); impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
@ -340,7 +340,7 @@ void cv::gpu::Stream::release()
{ {
if (impl && CV_XADD(&impl->ref_counter, -1) == 1) if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
{ {
cvCudaSafeCall( cudaStreamDestroy(impl->stream) ); cudaSafeCall( cudaStreamDestroy(impl->stream) );
cv::fastFree(impl); cv::fastFree(impl);
} }
} }

@ -91,25 +91,25 @@ int cv::gpu::getCudaEnabledDeviceCount()
if (error == cudaErrorNoDevice) if (error == cudaErrorNoDevice)
return 0; return 0;
cvCudaSafeCall( error ); cudaSafeCall( error );
return count; return count;
} }
void cv::gpu::setDevice(int device) void cv::gpu::setDevice(int device)
{ {
cvCudaSafeCall( cudaSetDevice( device ) ); cudaSafeCall( cudaSetDevice( device ) );
} }
int cv::gpu::getDevice() int cv::gpu::getDevice()
{ {
int device; int device;
cvCudaSafeCall( cudaGetDevice( &device ) ); cudaSafeCall( cudaGetDevice( &device ) );
return device; return device;
} }
void cv::gpu::resetDevice() void cv::gpu::resetDevice()
{ {
cvCudaSafeCall( cudaDeviceReset() ); cudaSafeCall( cudaDeviceReset() );
} }
namespace namespace
@ -302,7 +302,7 @@ namespace
if (!props_[devID]) if (!props_[devID])
{ {
props_[devID] = new cudaDeviceProp; props_[devID] = new cudaDeviceProp;
cvCudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) ); cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
} }
return props_[devID]; return props_[devID];
@ -322,7 +322,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
if (prevDeviceID != device_id_) if (prevDeviceID != device_id_)
setDevice(device_id_); setDevice(device_id_);
cvCudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) ); cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
if (prevDeviceID != device_id_) if (prevDeviceID != device_id_)
setDevice(prevDeviceID); setDevice(prevDeviceID);
@ -408,8 +408,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
printf("Device count: %d\n", count); printf("Device count: %d\n", count);
int driverVersion = 0, runtimeVersion = 0; int driverVersion = 0, runtimeVersion = 0;
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) ); cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
const char *computeMode[] = { const char *computeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
@ -423,7 +423,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
for(int dev = beg; dev < end; ++dev) for(int dev = beg; dev < end; ++dev)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
printf("\nDevice %d: \"%s\"\n", dev, prop.name); printf("\nDevice %d: \"%s\"\n", dev, prop.name);
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
@ -485,13 +485,13 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
int end = valid ? device+1 : count; int end = valid ? device+1 : count;
int driverVersion = 0, runtimeVersion = 0; int driverVersion = 0, runtimeVersion = 0;
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) ); cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
for(int dev = beg; dev < end; ++dev) for(int dev = beg; dev < end; ++dev)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
const char *arch_str = prop.major < 2 ? " (not Fermi)" : ""; const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f); printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
@ -983,7 +983,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) ); nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func> template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
@ -998,7 +998,7 @@ namespace
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) ); nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -1040,7 +1040,7 @@ namespace
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) ); nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func> template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
@ -1057,7 +1057,7 @@ namespace
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) ); nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -1088,7 +1088,7 @@ namespace
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) ); nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func> template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
@ -1105,7 +1105,7 @@ namespace
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) ); nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -1131,7 +1131,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) ); nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -1148,15 +1148,15 @@ namespace
public: public:
void copy(const Mat& src, GpuMat& dst) const void copy(const Mat& src, GpuMat& dst) const
{ {
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
} }
void copy(const GpuMat& src, Mat& dst) const void copy(const GpuMat& src, Mat& dst) const
{ {
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
} }
void copy(const GpuMat& src, GpuMat& dst) const void copy(const GpuMat& src, GpuMat& dst) const
{ {
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
} }
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
@ -1301,7 +1301,7 @@ namespace
{ {
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
{ {
cvCudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) ); cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
return; return;
} }
@ -1312,7 +1312,7 @@ namespace
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
{ {
int val = saturate_cast<uchar>(s[0]); int val = saturate_cast<uchar>(s[0]);
cvCudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) ); cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
return; return;
} }
} }
@ -1367,7 +1367,7 @@ namespace
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
{ {
cvCudaSafeCall( cudaMallocPitch(devPtr, step, width, height) ); cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
} }
void free(void* devPtr) const void free(void* devPtr) const

@ -191,18 +191,18 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return G
void cv::gpu::registerPageLocked(Mat& m) void cv::gpu::registerPageLocked(Mat& m)
{ {
cvCudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) ); cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
} }
void cv::gpu::unregisterPageLocked(Mat& m) void cv::gpu::unregisterPageLocked(Mat& m)
{ {
cvCudaSafeCall( cudaHostUnregister(m.ptr()) ); cudaSafeCall( cudaHostUnregister(m.ptr()) );
} }
bool cv::gpu::CudaMem::canMapHostMemory() bool cv::gpu::CudaMem::canMapHostMemory()
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) ); cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
return (prop.canMapHostMemory != 0) ? true : false; return (prop.canMapHostMemory != 0) ? true : false;
} }
@ -237,7 +237,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
if (_alloc_type == ALLOC_ZEROCOPY) if (_alloc_type == ALLOC_ZEROCOPY)
{ {
cudaDeviceProp prop; cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) ); cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
step = alignUpStep(step, prop.textureAlignment); step = alignUpStep(step, prop.textureAlignment);
} }
int64 _nettosize = (int64)step*rows; int64 _nettosize = (int64)step*rows;
@ -252,9 +252,9 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
switch (alloc_type) switch (alloc_type)
{ {
case ALLOC_PAGE_LOCKED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break; case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break; case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
case ALLOC_WRITE_COMBINED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break; case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
} }
@ -273,7 +273,7 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
GpuMat res; GpuMat res;
void *pdev; void *pdev;
cvCudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) ); cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
res = GpuMat(rows, cols, type(), pdev, step); res = GpuMat(rows, cols, type(), pdev, step);
return res; return res;
@ -283,7 +283,7 @@ void cv::gpu::CudaMem::release()
{ {
if( refcount && CV_XADD(refcount, -1) == 1 ) if( refcount && CV_XADD(refcount, -1) == 1 )
{ {
cvCudaSafeCall( cudaFreeHost(datastart ) ); cudaSafeCall( cudaFreeHost(datastart ) );
fastFree(refcount); fastFree(refcount);
} }
data = datastart = dataend = 0; data = datastart = dataend = 0;

@ -133,7 +133,7 @@ void cv::gpu::setGlDevice(int device)
(void) device; (void) device;
throw_no_cuda(); throw_no_cuda();
#else #else
cvCudaSafeCall( cudaGLSetGLDevice(device) ); cudaSafeCall( cudaGLSetGLDevice(device) );
#endif #endif
#endif #endif
} }
@ -184,7 +184,7 @@ namespace
return; return;
cudaGraphicsResource_t resource; cudaGraphicsResource_t resource;
cvCudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) ); cudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
release(); release();
@ -217,7 +217,7 @@ namespace
CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream) CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream)
{ {
if (resource_) if (resource_)
cvCudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) ); cudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
} }
CudaResource::GraphicsMapHolder::~GraphicsMapHolder() CudaResource::GraphicsMapHolder::~GraphicsMapHolder()
@ -240,14 +240,14 @@ namespace
void* dst; void* dst;
size_t size; size_t size;
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) ); cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
CV_DbgAssert( width * height == size ); CV_DbgAssert( width * height == size );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
else else
cvCudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
} }
void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream) void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream)
@ -259,14 +259,14 @@ namespace
void* src; void* src;
size_t size; size_t size;
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) ); cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
CV_DbgAssert( width * height == size ); CV_DbgAssert( width * height == size );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
else else
cvCudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
} }
void* CudaResource::map(cudaStream_t stream) void* CudaResource::map(cudaStream_t stream)
@ -277,7 +277,7 @@ namespace
void* ptr; void* ptr;
size_t size; size_t size;
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) ); cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
h.reset(); h.reset();

@ -246,7 +246,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -287,7 +287,7 @@ namespace
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) ); (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }
@ -402,7 +402,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -427,7 +427,7 @@ namespace
nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }

@ -403,7 +403,7 @@ public:
unsigned int classified = 0; unsigned int classified = 0;
GpuMat dclassified(1, 1, CV_32S); GpuMat dclassified(1, 1, CV_32S);
cvCudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize); PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize);
@ -448,11 +448,11 @@ public:
if (groupThreshold <= 0 || objects.empty()) if (groupThreshold <= 0 || objects.empty())
return 0; return 0;
cvCudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
cudev::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>()); cudev::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
cvCudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
return classified; return classified;
} }
@ -481,7 +481,7 @@ private:
roiSize.height = frame.height; roiSize.height = frame.height;
cudaDeviceProp prop; cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize; Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );

@ -1600,7 +1600,7 @@ namespace
nppSafeCall( nppiAlphaPremul_16u_AC4R(src.ptr<Npp16u>(), static_cast<int>(src.step), dst.ptr<Npp16u>(), static_cast<int>(dst.step), oSizeROI) ); nppSafeCall( nppiAlphaPremul_16u_AC4R(src.ptr<Npp16u>(), static_cast<int>(src.step), dst.ptr<Npp16u>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
#endif #endif
} }
@ -1942,7 +1942,7 @@ void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
nppSafeCall( nppiSwapChannels_8u_C4IR(image.ptr<Npp8u>(), static_cast<int>(image.step), sz, dstOrder) ); nppSafeCall( nppiSwapChannels_8u_C4IR(image.ptr<Npp8u>(), static_cast<int>(image.step), sz, dstOrder) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stream& stream) void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stream& stream)

@ -60,7 +60,7 @@ namespace cv { namespace gpu { namespace cudev {
void loadHueCSC(float hueCSC[9]) void loadHueCSC(float hueCSC[9])
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
} }
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) __device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
@ -190,10 +190,10 @@ namespace cv { namespace gpu { namespace cudev {
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step, NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
interopFrame.cols, interopFrame.rows); interopFrame.cols, interopFrame.rows);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
}}} }}}

@ -417,10 +417,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data); matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask> template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
@ -478,10 +478,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data); matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -594,10 +594,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data); matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask> template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
@ -653,10 +653,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data); matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -768,10 +768,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data); match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask> template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
@ -827,10 +827,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data); match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -959,10 +959,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
calcDistanceUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, allDist); calcDistanceUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, allDist);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask> template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
@ -1022,10 +1022,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
calcDistance<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, allDist); calcDistance<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, allDist);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -1115,11 +1115,11 @@ namespace cv { namespace gpu { namespace cudev
for (int i = 0; i < k; ++i) for (int i = 0; i < k; ++i)
{ {
findBestMatch<BLOCK_SIZE><<<grid, block, 0, stream>>>(allDist, i, trainIdx, distance); findBestMatch<BLOCK_SIZE><<<grid, block, 0, stream>>>(allDist, i, trainIdx, distance);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void findKnnMatchDispatcher(int k, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream) void findKnnMatchDispatcher(int k, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream)

@ -177,10 +177,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data); matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask> template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
@ -236,10 +236,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data); matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -335,10 +335,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data); matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask> template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
@ -392,10 +392,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data); matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -490,10 +490,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data); match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask> template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
@ -546,10 +546,10 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data); match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////

@ -122,10 +122,10 @@ namespace cv { namespace gpu { namespace cudev
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask, matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T> template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
@ -153,11 +153,11 @@ namespace cv { namespace gpu { namespace cudev
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(), matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -230,10 +230,10 @@ namespace cv { namespace gpu { namespace cudev
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask, match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols); trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int BLOCK_SIZE, typename Dist, typename T> template <int BLOCK_SIZE, typename Dist, typename T>
@ -261,11 +261,11 @@ namespace cv { namespace gpu { namespace cudev
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(), match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols); trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////

@ -62,15 +62,15 @@ namespace cv { namespace gpu { namespace cudev {
void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior, void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
float decisionThreshold, int maxFeatures, int numInitializationFrames) float decisionThreshold, int maxFeatures, int numInitializationFrames)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_width, &width, sizeof(width)) ); cudaSafeCall( cudaMemcpyToSymbol(c_width, &width, sizeof(width)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_height, &height, sizeof(height)) ); cudaSafeCall( cudaMemcpyToSymbol(c_height, &height, sizeof(height)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_minVal, &minVal, sizeof(minVal)) ); cudaSafeCall( cudaMemcpyToSymbol(c_minVal, &minVal, sizeof(minVal)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_maxVal, &maxVal, sizeof(maxVal)) ); cudaSafeCall( cudaMemcpyToSymbol(c_maxVal, &maxVal, sizeof(maxVal)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_quantizationLevels, &quantizationLevels, sizeof(quantizationLevels)) ); cudaSafeCall( cudaMemcpyToSymbol(c_quantizationLevels, &quantizationLevels, sizeof(quantizationLevels)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_backgroundPrior, &backgroundPrior, sizeof(backgroundPrior)) ); cudaSafeCall( cudaMemcpyToSymbol(c_backgroundPrior, &backgroundPrior, sizeof(backgroundPrior)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_decisionThreshold, &decisionThreshold, sizeof(decisionThreshold)) ); cudaSafeCall( cudaMemcpyToSymbol(c_decisionThreshold, &decisionThreshold, sizeof(decisionThreshold)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_maxFeatures, &maxFeatures, sizeof(maxFeatures)) ); cudaSafeCall( cudaMemcpyToSymbol(c_maxFeatures, &maxFeatures, sizeof(maxFeatures)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_numInitializationFrames, &numInitializationFrames, sizeof(numInitializationFrames)) ); cudaSafeCall( cudaMemcpyToSymbol(c_numInitializationFrames, &numInitializationFrames, sizeof(numInitializationFrames)) );
} }
__device__ float findFeature(const int color, const PtrStepi& colors, const PtrStepf& weights, const int x, const int y, const int nfeatures) __device__ float findFeature(const int color, const PtrStepi& colors, const PtrStepf& weights, const int x, const int y, const int nfeatures)
@ -230,14 +230,14 @@ namespace cv { namespace gpu { namespace cudev {
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); const dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
update<SrcT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel); update<SrcT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void update_gpu<uchar >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream); template void update_gpu<uchar >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);

@ -180,16 +180,16 @@ namespace cv { namespace gpu { namespace cudev
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
nmixtures, varThreshold, backgroundRatio); nmixtures, varThreshold, backgroundRatio);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
@ -333,16 +333,16 @@ namespace cv { namespace gpu { namespace cudev
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var, weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
nmixtures, varThreshold, backgroundRatio, learningRate, minVar); nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
@ -406,13 +406,13 @@ namespace cv { namespace gpu { namespace cudev
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio); getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream) void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
@ -445,15 +445,15 @@ namespace cv { namespace gpu { namespace cudev
varMin = ::fminf(varMin, varMax); varMin = ::fminf(varMin, varMax);
varMax = ::fmaxf(varMin, varMax); varMax = ::fmaxf(varMin, varMax);
cvCudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
} }
template <bool detectShadows, typename SrcT, typename WorkT> template <bool detectShadows, typename SrcT, typename WorkT>
@ -665,7 +665,7 @@ namespace cv { namespace gpu { namespace cudev
if (detectShadows) if (detectShadows)
{ {
cvCudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed, mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
weight, variance, (PtrStepSz<WorkT>) mean, weight, variance, (PtrStepSz<WorkT>) mean,
@ -673,17 +673,17 @@ namespace cv { namespace gpu { namespace cudev
} }
else else
{ {
cvCudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed, mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
weight, variance, (PtrStepSz<WorkT>) mean, weight, variance, (PtrStepSz<WorkT>) mean,
alphaT, alpha1, prune); alphaT, alpha1, prune);
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
@ -737,13 +737,13 @@ namespace cv { namespace gpu { namespace cudev
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst); getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)

@ -135,12 +135,12 @@ namespace cv { namespace gpu { namespace cudev
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial); float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color); float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
cvCudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half); bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
cvCudaSafeCall ( cudaGetLastError () ); cudaSafeCall ( cudaGetLastError () );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template<typename T> template<typename T>

@ -73,10 +73,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y)); dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result); blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream); template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
@ -109,10 +109,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result); blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
} // namespace blend } // namespace blend
}}} // namespace cv { namespace gpu { namespace cudev }}} // namespace cv { namespace gpu { namespace cudev

@ -75,10 +75,10 @@ namespace cv { namespace gpu { namespace cudev
const float* transl, PtrStepSz<float3> dst, const float* transl, PtrStepSz<float3> dst,
cudaStream_t stream) cudaStream_t stream)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
cv::gpu::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream); cv::gpu::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
} }
} // namespace transform_points } // namespace transform_points
@ -114,12 +114,12 @@ namespace cv { namespace gpu { namespace cudev
const float* transl, const float* proj, PtrStepSz<float2> dst, const float* transl, const float* proj, PtrStepSz<float2> dst,
cudaStream_t stream) cudaStream_t stream)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
cvCudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
cv::gpu::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream); cv::gpu::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
} }
} // namespace project_points } // namespace project_points
@ -174,17 +174,17 @@ namespace cv { namespace gpu { namespace cudev
const float3* transl_vectors, const float3* object, const float2* image, const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores) const float dist_threshold, int* hypothesis_scores)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3))); cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3)));
cvCudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3))); cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3)));
dim3 threads(256); dim3 threads(256);
dim3 grid(num_hypotheses); dim3 grid(num_hypotheses);
computeHypothesisScoresKernel<256><<<grid, threads>>>( computeHypothesisScoresKernel<256><<<grid, threads>>>(
num_points, object, image, dist_threshold, hypothesis_scores); num_points, object, image, dist_threshold, hypothesis_scores);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} // namespace solvepnp_ransac } // namespace solvepnp_ransac
}}} // namespace cv { namespace gpu { namespace cudev }}} // namespace cv { namespace gpu { namespace cudev

@ -141,9 +141,9 @@ namespace canny
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm); calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad) void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
@ -227,9 +227,9 @@ namespace canny
bindTexture(&tex_mag, mag); bindTexture(&tex_mag, mag);
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh); calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -324,17 +324,17 @@ namespace canny
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1) void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
{ {
void* counter_ptr; void* counter_ptr;
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) ); cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
const dim3 block(16, 16); const dim3 block(16, 16);
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y)); const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1); edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -435,24 +435,24 @@ namespace canny
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2) void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
{ {
void* counter_ptr; void* counter_ptr;
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) ); cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
int count; int count;
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
while (count > 0) while (count > 0)
{ {
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
const dim3 block(128); const dim3 block(128);
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1); const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count); edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
std::swap(st1, st2); std::swap(st1, st2);
} }

@ -215,9 +215,9 @@ namespace cv { namespace gpu { namespace cudev
Int_t inInt(lo, hi); Int_t inInt(lo, hi);
computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const PtrStepSz<T> >(image), edges, inInt); computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const PtrStepSz<T> >(image), edges, inInt);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void computeEdges<uchar> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); template void computeEdges<uchar> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
@ -503,7 +503,7 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS)); dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
lableTiles<<<grid, block, 0, stream>>>(edges, comps); lableTiles<<<grid, block, 0, stream>>>(edges, comps);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS; int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS;
while (grid.x > 1 || grid.y > 1) while (grid.x > 1 || grid.y > 1)
@ -517,16 +517,16 @@ namespace cv { namespace gpu { namespace cudev
tileSizeY <<= 1; tileSizeY <<= 1;
grid = mergeGrid; grid = mergeGrid;
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
grid.x = divUp(edges.cols, block.x); grid.x = divUp(edges.cols, block.x);
grid.y = divUp(edges.rows, block.y); grid.y = divUp(edges.rows, block.y);
flatten<<<grid, block, 0, stream>>>(edges, comps); flatten<<<grid, block, 0, stream>>>(edges, comps);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
} } } } } }

@ -128,10 +128,10 @@ namespace clahe
calcLutKernel<<<grid, block, 0, stream>>>(src, lut, tileSize, tilesX, clipLimit, lutScale); calcLutKernel<<<grid, block, 0, stream>>>(src, lut, tileSize, tilesX, clipLimit, lutScale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void tranformKernel(const PtrStepSzb src, PtrStepb dst, const PtrStepb lut, const int2 tileSize, const int tilesX, const int tilesY) __global__ void tranformKernel(const PtrStepSzb src, PtrStepb dst, const PtrStepb lut, const int2 tileSize, const int tilesX, const int tilesY)
@ -173,13 +173,13 @@ namespace clahe
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(tranformKernel, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(tranformKernel, cudaFuncCachePreferL1) );
tranformKernel<<<grid, block, 0, stream>>>(src, dst, lut, tileSize, tilesX, tilesY); tranformKernel<<<grid, block, 0, stream>>>(src, dst, lut, tileSize, tilesX, tilesY);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }

@ -169,10 +169,10 @@ namespace column_filter
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd); linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -363,9 +363,9 @@ namespace filter
}; };
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else else
cvCudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream); callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
} }

@ -70,10 +70,10 @@ namespace cv { namespace gpu { namespace cudev
BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd); BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd);
copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left); copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };

@ -347,13 +347,13 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y)); const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green); Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int cn> template <int cn>
@ -364,13 +364,13 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y)); const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green); Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
@ -530,10 +530,10 @@ namespace cv { namespace gpu { namespace cudev
bindTexture(&sourceTex, src); bindTexture(&sourceTex, src);
MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed); MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);

@ -61,16 +61,16 @@ namespace cv { namespace gpu { namespace cudev
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc) void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) ); cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
cvCudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) ); cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
size_t table_space_step = table_space.step / sizeof(float); size_t table_space_step = table_space.step / sizeof(float);
cvCudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) ); cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
} }
template <int channels> template <int channels>
@ -191,20 +191,20 @@ namespace cv { namespace gpu { namespace cudev
for (int i = 0; i < iters; ++i) for (int i = 0; i < iters; ++i)
{ {
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
break; break;
case 3: case 3:
for (int i = 0; i < iters; ++i) for (int i = 0; i < iters; ++i)
{ {
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols); disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
break; break;
default: default:
@ -212,7 +212,7 @@ namespace cv { namespace gpu { namespace cudev
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void disp_bilateral_filter<uchar>(PtrStepSz<uchar> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream); template void disp_bilateral_filter<uchar>(PtrStepSz<uchar> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);

@ -282,7 +282,7 @@ namespace cv { namespace gpu { namespace cudev
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold) int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
{ {
void* counter_ptr; void* counter_ptr;
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
dim3 block(32, 8); dim3 block(32, 8);
@ -290,7 +290,7 @@ namespace cv { namespace gpu { namespace cudev
grid.x = divUp(img.cols - 6, block.x); grid.x = divUp(img.cols - 6, block.x);
grid.y = divUp(img.rows - 6, block.y); grid.y = divUp(img.rows - 6, block.y);
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) ); cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
if (score.data) if (score.data)
{ {
@ -307,12 +307,12 @@ namespace cv { namespace gpu { namespace cudev
calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold); calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
unsigned int count; unsigned int count;
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
return count; return count;
} }
@ -359,22 +359,22 @@ namespace cv { namespace gpu { namespace cudev
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response) int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
{ {
void* counter_ptr; void* counter_ptr;
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
dim3 block(256); dim3 block(256);
dim3 grid; dim3 grid;
grid.x = divUp(count, block.x); grid.x = divUp(count, block.x);
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) ); cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
nonmaxSupression<<<grid, block>>>(kpLoc, count, score, loc, response); nonmaxSupression<<<grid, block>>>(kpLoc, count, score, loc, response);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
unsigned int new_count; unsigned int new_count;
cvCudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
return new_count; return new_count;
} }

@ -205,13 +205,13 @@ namespace bgfg
calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>( calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>(
(PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, partialBuf0, partialBuf1, partialBuf2); (PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, partialBuf0, partialBuf1, partialBuf2);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
mergeHistogram<<<HISTOGRAM_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(partialBuf0, partialBuf1, partialBuf2, hist0, hist1, hist2); mergeHistogram<<<HISTOGRAM_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(partialBuf0, partialBuf1, partialBuf2, hist0, hist1, hist2);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream); template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
@ -251,10 +251,10 @@ namespace bgfg
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y)); dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
calcDiffThreshMask<PT, CT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, bestThres, changeMask); calcDiffThreshMask<PT, CT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, bestThres, changeMask);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void calcDiffThreshMask_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream); template void calcDiffThreshMask_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
@ -269,7 +269,7 @@ namespace bgfg
void setBGPixelStat(const BGPixelStat& stat) void setBGPixelStat(const BGPixelStat& stat)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_stat, &stat, sizeof(BGPixelStat)) ); cudaSafeCall( cudaMemcpyToSymbol(c_stat, &stat, sizeof(BGPixelStat)) );
} }
template <typename T> struct Output; template <typename T> struct Output;
@ -374,15 +374,15 @@ namespace bgfg
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y)); dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) );
bgfgClassification<PT, CT, OT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, bgfgClassification<PT, CT, OT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame,
Ftd, Fbd, foreground, Ftd, Fbd, foreground,
deltaC, deltaCC, alpha2, N1c, N1cc); deltaC, deltaCC, alpha2, N1c, N1cc);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void bgfgClassification_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream); template void bgfgClassification_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
@ -765,17 +765,17 @@ namespace bgfg
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y)); dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) );
updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb><<<grid, block, 0, stream>>>( updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb><<<grid, block, 0, stream>>>(
prevFrame.cols, prevFrame.rows, prevFrame.cols, prevFrame.rows,
prevFrame, curFrame, prevFrame, curFrame,
Ftd, Fbd, foreground, background, Ftd, Fbd, foreground, background,
deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T); deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };

@ -94,9 +94,9 @@ namespace cv { namespace gpu { namespace cudev
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count) int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
{ {
void* counter_ptr; void* counter_ptr;
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
bindTexture(&eigTex, eig); bindTexture(&eigTex, eig);
@ -108,12 +108,12 @@ namespace cv { namespace gpu { namespace cudev
else else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols); findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int count; int count;
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
return std::min(count, max_count); return std::min(count, max_count);
} }

@ -98,8 +98,8 @@ void calcWobbleSuppressionMaps(
int left, int idx, int right, int width, int height, int left, int idx, int right, int width, int height,
const float *ml, const float *mr, PtrStepSzf mapx, PtrStepSzf mapy) const float *ml, const float *mr, PtrStepSzf mapx, PtrStepSzf mapy)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(cml, ml, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(cml, ml, 9*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(cmr, mr, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(cmr, mr, 9*sizeof(float)));
dim3 threads(32, 8); dim3 threads(32, 8);
dim3 grid(divUp(width, threads.x), divUp(height, threads.y)); dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
@ -107,8 +107,8 @@ void calcWobbleSuppressionMaps(
calcWobbleSuppressionMapsKernel<<<grid, threads>>>( calcWobbleSuppressionMapsKernel<<<grid, threads>>>(
left, idx, right, width, height, mapx, mapy); left, idx, right, width, height, mapx, mapy);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
}}}} }}}}

@ -100,10 +100,10 @@ namespace hist
const dim3 grid(divUp(src.rows, block.y)); const dim3 grid(divUp(src.rows, block.y));
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist); histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -140,9 +140,9 @@ namespace hist
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream) void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
{ {
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
else else
cvCudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) );
const float scale = 255.0f / (src.cols * src.rows); const float scale = 255.0f / (src.cols * src.rows);

@ -90,23 +90,23 @@ namespace cv { namespace gpu { namespace cudev
void set_up_constants(int nbins, int block_stride_x, int block_stride_y, void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y) int nblocks_win_x, int nblocks_win_y)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) ); cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) ); cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) ); cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) ); cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) ); cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) ); cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
int block_hist_size_2up = power_2up(block_hist_size); int block_hist_size_2up = power_2up(block_hist_size);
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) ); cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) );
int descr_width = nblocks_win_x * block_hist_size; int descr_width = nblocks_win_x * block_hist_size;
cvCudaSafeCall( cudaMemcpyToSymbol(cdescr_width, &descr_width, sizeof(descr_width)) ); cudaSafeCall( cudaMemcpyToSymbol(cdescr_width, &descr_width, sizeof(descr_width)) );
int descr_size = descr_width * nblocks_win_y; int descr_size = descr_width * nblocks_win_y;
cvCudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) ); cudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
} }
@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(img_block_width, nblocks), img_block_height); dim3 grid(divUp(img_block_width, nblocks), img_block_height);
dim3 threads(32, 2, nblocks); dim3 threads(32, 2, nblocks);
cvCudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>, cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
cudaFuncCachePreferL1)); cudaFuncCachePreferL1));
// Precompute gaussian spatial window parameter // Precompute gaussian spatial window parameter
@ -217,9 +217,9 @@ namespace cv { namespace gpu { namespace cudev
int smem = hists_size + final_hists_size; int smem = hists_size + final_hists_size;
compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>( compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>(
img_block_width, grad, qangle, scale, block_hists); img_block_width, grad, qangle, scale, block_hists);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -318,9 +318,9 @@ namespace cv { namespace gpu { namespace cudev
else else
CV_Error(cv::Error::StsBadArg, "normalize_hists: histogram's size is too big, try to decrease number of bins"); CV_Error(cv::Error::StsBadArg, "normalize_hists: histogram's size is too big, try to decrease number of bins");
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -378,7 +378,7 @@ namespace cv { namespace gpu { namespace cudev
dim3 threads(nthreads, 1, nblocks); dim3 threads(nthreads, 1, nblocks);
dim3 grid(divUp(img_win_width, nblocks), img_win_height); dim3 grid(divUp(img_win_width, nblocks), img_win_height);
cvCudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>, cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
cudaFuncCachePreferL1)); cudaFuncCachePreferL1));
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
@ -386,7 +386,7 @@ namespace cv { namespace gpu { namespace cudev
compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>( compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, coefs, free_coef, threshold, confidences); block_hists, coefs, free_coef, threshold, confidences);
cvCudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
@ -440,15 +440,15 @@ namespace cv { namespace gpu { namespace cudev
dim3 threads(nthreads, 1, nblocks); dim3 threads(nthreads, 1, nblocks);
dim3 grid(divUp(img_win_width, nblocks), img_win_height); dim3 grid(divUp(img_win_width, nblocks), img_win_height);
cvCudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1)); cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>( classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, coefs, free_coef, threshold, labels); block_hists, coefs, free_coef, threshold, labels);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
@ -491,9 +491,9 @@ namespace cv { namespace gpu { namespace cudev
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>( extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors); img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -540,9 +540,9 @@ namespace cv { namespace gpu { namespace cudev
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>( extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors); img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
@ -666,9 +666,9 @@ namespace cv { namespace gpu { namespace cudev
else else
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle); compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int nthreads, int correct_gamma> template <int nthreads, int correct_gamma>
@ -739,9 +739,9 @@ namespace cv { namespace gpu { namespace cudev
else else
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle); compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -782,13 +782,13 @@ namespace cv { namespace gpu { namespace cudev
int colOfs = 0; int colOfs = 0;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cvCudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) ); cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
if (texOfs != 0) if (texOfs != 0)
{ {
colOfs = static_cast<int>( texOfs/sizeof(T) ); colOfs = static_cast<int>( texOfs/sizeof(T) );
cvCudaSafeCall( cudaUnbindTexture(tex) ); cudaSafeCall( cudaUnbindTexture(tex) );
cvCudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) ); cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
} }
dim3 threads(32, 8); dim3 threads(32, 8);
@ -798,11 +798,11 @@ namespace cv { namespace gpu { namespace cudev
float sy = static_cast<float>(src.rows) / dst.rows; float sy = static_cast<float>(src.rows) / dst.rows;
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (PtrStepSz<T>)dst, colOfs); resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (PtrStepSz<T>)dst, colOfs);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaUnbindTexture(tex) ); cudaSafeCall( cudaUnbindTexture(tex) );
} }
void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); } void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }

@ -122,22 +122,22 @@ namespace cv { namespace gpu { namespace cudev
const int PIXELS_PER_THREAD = 16; const int PIXELS_PER_THREAD = 16;
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4); const dim3 block(32, 4);
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) ); cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list); buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount; return totalCount;
} }
@ -225,9 +225,9 @@ namespace cv { namespace gpu { namespace cudev
else else
linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -264,22 +264,22 @@ namespace cv { namespace gpu { namespace cudev
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort) int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2); linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize); totalCount = ::min(totalCount, maxSize);
@ -462,9 +462,9 @@ namespace cv { namespace gpu { namespace cudev
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength) int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
@ -476,12 +476,12 @@ namespace cv { namespace gpu { namespace cudev
rho, theta, rho, theta,
lineGap, lineLength, lineGap, lineLength,
mask.rows, mask.cols); mask.rows, mask.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize); totalCount = ::min(totalCount, maxSize);
@ -548,12 +548,12 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(256); const dim3 block(256);
const dim3 grid(divUp(count, block.x)); const dim3 grid(divUp(count, block.x));
cvCudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -586,22 +586,22 @@ namespace cv { namespace gpu { namespace cudev
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold) int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
buildCentersList<<<grid, block>>>(accum, centers, threshold); buildCentersList<<<grid, block>>>(accum, centers, threshold);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount; return totalCount;
} }
@ -662,9 +662,9 @@ namespace cv { namespace gpu { namespace cudev
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(has20 ? 1024 : 512); const dim3 block(has20 ? 1024 : 512);
const dim3 grid(centersCount); const dim3 grid(centersCount);
@ -673,12 +673,12 @@ namespace cv { namespace gpu { namespace cudev
size_t smemSize = (histSize + 2) * sizeof(int); size_t smemSize = (histSize + 2) * sizeof(int);
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxCircles); totalCount = ::min(totalCount, maxCircles);
@ -768,22 +768,22 @@ namespace cv { namespace gpu { namespace cudev
const int PIXELS_PER_THREAD = 8; const int PIXELS_PER_THREAD = 8;
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4); const dim3 block(32, 4);
const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y)); const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) ); cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildEdgePointList<T, PIXELS_PER_THREAD><<<grid, block>>>(edges, (PtrStepSz<T>) dx, (PtrStepSz<T>) dy, coordList, thetaList); buildEdgePointList<T, PIXELS_PER_THREAD><<<grid, block>>>(edges, (PtrStepSz<T>) dx, (PtrStepSz<T>) dy, coordList, thetaList);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount; return totalCount;
} }
@ -824,9 +824,9 @@ namespace cv { namespace gpu { namespace cudev
const float thetaScale = levels / (2.0f * CV_PI_F); const float thetaScale = levels / (2.0f * CV_PI_F);
buildRTable<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale); buildRTable<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -877,9 +877,9 @@ namespace cv { namespace gpu { namespace cudev
const float thetaScale = levels / (2.0f * CV_PI_F); const float thetaScale = levels / (2.0f * CV_PI_F);
GHT_Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale); GHT_Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold) __global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold)
@ -911,22 +911,22 @@ namespace cv { namespace gpu { namespace cudev
int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold) int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold); GHT_Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize); totalCount = ::min(totalCount, maxSize);
@ -989,9 +989,9 @@ namespace cv { namespace gpu { namespace cudev
hist, rows, cols, hist, rows, cols,
minScale, scaleStep, scaleRange, minScale, scaleStep, scaleRange,
idp, thetaScale); idp, thetaScale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange, __global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange,
@ -1037,22 +1037,22 @@ namespace cv { namespace gpu { namespace cudev
float minScale, float scaleStep, float dp, int threshold) float minScale, float scaleStep, float dp, int threshold)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold); GHT_Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize); totalCount = ::min(totalCount, maxSize);
@ -1123,9 +1123,9 @@ namespace cv { namespace gpu { namespace cudev
hist, rows, cols, hist, rows, cols,
minAngle, angleStep, angleRange, minAngle, angleStep, angleRange,
idp, thetaScale); idp, thetaScale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange, __global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange,
@ -1171,22 +1171,22 @@ namespace cv { namespace gpu { namespace cudev
float minAngle, float angleStep, float dp, int threshold) float minAngle, float angleStep, float dp, int threshold)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold); GHT_Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize); totalCount = ::min(totalCount, maxSize);
@ -1242,7 +1242,7 @@ namespace cv { namespace gpu { namespace cudev
tbl.r2_data = r2.data; tbl.r2_data = r2.data;
tbl.r2_step = r2.step; tbl.r2_step = r2.step;
cvCudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) ); cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
} }
void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
{ {
@ -1266,7 +1266,7 @@ namespace cv { namespace gpu { namespace cudev
tbl.r2_data = r2.data; tbl.r2_data = r2.data;
tbl.r2_step = r2.step; tbl.r2_step = r2.step;
cvCudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) ); cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
} }
struct TemplFeatureTable struct TemplFeatureTable
@ -1419,9 +1419,9 @@ namespace cv { namespace gpu { namespace cudev
sizes, maxSize, sizes, maxSize,
xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale, xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale,
center, maxDist); center, maxDist);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
thrust::device_ptr<int> sizesPtr(sizes); thrust::device_ptr<int> sizesPtr(sizes);
thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize)); thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize));
@ -1501,9 +1501,9 @@ namespace cv { namespace gpu { namespace cudev
GHT_Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist, GHT_Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist,
minAngle, maxAngle, 1.0f / angleStep, angleRange); minAngle, maxAngle, 1.0f / angleStep, angleRange);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist, __global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist,
@ -1566,9 +1566,9 @@ namespace cv { namespace gpu { namespace cudev
GHT_Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist, GHT_Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist,
angle, angleEpsilon, angle, angleEpsilon,
minScale, maxScale, iScaleStep, scaleRange); minScale, maxScale, iScaleStep, scaleRange);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, __global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
@ -1636,14 +1636,14 @@ namespace cv { namespace gpu { namespace cudev
const float sinVal = ::sinf(angle); const float sinVal = ::sinf(angle);
const float cosVal = ::cosf(angle); const float cosVal = ::cosf(angle);
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) );
GHT_Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist, GHT_Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist,
angle, sinVal, cosVal, angleEpsilon, scale, angle, sinVal, cosVal, angleEpsilon, scale,
1.0f / dp); 1.0f / dp);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, __global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize,
@ -1679,24 +1679,24 @@ namespace cv { namespace gpu { namespace cudev
float dp, int threshold) float dp, int threshold)
{ {
void* counterPtr; void* counterPtr;
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvCudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
GHT_Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, GHT_Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize,
angle, angleVotes, scale, scaleVotes, angle, angleVotes, scale, scaleVotes,
dp, threshold); dp, threshold);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
int totalCount; int totalCount;
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize); totalCount = ::min(totalCount, maxSize);

@ -154,13 +154,13 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cvCudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) ); cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps ); meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
} }
@ -173,13 +173,13 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cvCudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) ); cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps ); meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
} }
@ -295,10 +295,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp); drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream) void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
@ -309,10 +309,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp); drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////// reprojectImageTo3D /////////////////////////////////////////////// /////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
@ -351,13 +351,13 @@ namespace cv { namespace gpu { namespace cudev
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y)); dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
cvCudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz); reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
@ -464,10 +464,10 @@ namespace cv { namespace gpu { namespace cudev
break; break;
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////// Corner Min Eigen Val ///////////////////////////////////////////////// /////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
@ -576,10 +576,10 @@ namespace cv { namespace gpu { namespace cudev
break; break;
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
////////////////////////////// Column Sum ////////////////////////////////////// ////////////////////////////// Column Sum //////////////////////////////////////
@ -611,9 +611,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(src.cols, threads.x)); dim3 grid(divUp(src.cols, threads.x));
column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst); column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -638,10 +638,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y)); dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c); mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -666,10 +666,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y)); dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c); mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -695,10 +695,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y)); dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c); mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream) if (stream)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -724,10 +724,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y)); dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c); mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -837,10 +837,10 @@ namespace cv { namespace gpu { namespace cudev
const float k_rinv[9], const float r_kinv[9], const float t[3], const float k_rinv[9], const float r_kinv[9], const float t[3],
float scale, cudaStream_t stream) float scale, cudaStream_t stream)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
int cols = map_x.cols; int cols = map_x.cols;
int rows = map_x.rows; int rows = map_x.rows;
@ -849,9 +849,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y); buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -859,9 +859,9 @@ namespace cv { namespace gpu { namespace cudev
const float k_rinv[9], const float r_kinv[9], float scale, const float k_rinv[9], const float r_kinv[9], float scale,
cudaStream_t stream) cudaStream_t stream)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
int cols = map_x.cols; int cols = map_x.cols;
int rows = map_x.rows; int rows = map_x.rows;
@ -870,9 +870,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y); buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -880,9 +880,9 @@ namespace cv { namespace gpu { namespace cudev
const float k_rinv[9], const float r_kinv[9], float scale, const float k_rinv[9], const float r_kinv[9], float scale,
cudaStream_t stream) cudaStream_t stream)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float))); cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
int cols = map_x.cols; int cols = map_x.cols;
int rows = map_x.rows; int rows = map_x.rows;
@ -891,9 +891,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y); buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -955,9 +955,9 @@ namespace cv { namespace gpu { namespace cudev
Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \ Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \ BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \ filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
cvCudaSafeCall( cudaGetLastError() ); \ cudaSafeCall( cudaGetLastError() ); \
if (stream == 0) \ if (stream == 0) \
cvCudaSafeCall( cudaDeviceSynchronize() ); \ cudaSafeCall( cudaDeviceSynchronize() ); \
} \ } \
}; };
@ -988,9 +988,9 @@ namespace cv { namespace gpu { namespace cudev
}; };
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else else
cvCudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
} }

@ -367,10 +367,10 @@ namespace cv { namespace gpu { namespace cudev
// launch 1 block / row // launch 1 block / row
const int grid = img.rows; const int grid = img.rows;
cvCudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
shfl_integral_horizontal<<<grid, block, 0, stream>>>((const PtrStepSz<uint4>) img, (PtrStepSz<uint4>) integral); shfl_integral_horizontal<<<grid, block, 0, stream>>>((const PtrStepSz<uint4>) img, (PtrStepSz<uint4>) integral);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
{ {
@ -378,11 +378,11 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(integral.cols, block.x), 1); const dim3 grid(divUp(integral.cols, block.x), 1);
shfl_integral_vertical<<<grid, block, 0, stream>>>(integral); shfl_integral_vertical<<<grid, block, 0, stream>>>(integral);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void shfl_integral_vertical(PtrStepSz<unsigned int> buffer, PtrStepSz<unsigned int> integral) __global__ void shfl_integral_vertical(PtrStepSz<unsigned int> buffer, PtrStepSz<unsigned int> integral)
@ -452,10 +452,10 @@ namespace cv { namespace gpu { namespace cudev
const int block = blockStep; const int block = blockStep;
const int grid = img.rows; const int grid = img.rows;
cvCudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
shfl_integral_horizontal<<<grid, block, 0, stream>>>((PtrStepSz<uint4>) img, buffer); shfl_integral_horizontal<<<grid, block, 0, stream>>>((PtrStepSz<uint4>) img, buffer);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
{ {
@ -463,7 +463,7 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(integral.cols, block.x), 1); const dim3 grid(divUp(integral.cols, block.x), 1);
shfl_integral_vertical<<<grid, block, 0, stream>>>((PtrStepSz<uint>)buffer, integral); shfl_integral_vertical<<<grid, block, 0, stream>>>((PtrStepSz<uint>)buffer, integral);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
} }
} }

@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace cudev
int block = ncandidates; int block = ncandidates;
int smem = block * ( sizeof(int) + sizeof(int4) ); int smem = block * ( sizeof(int) + sizeof(int4) );
disjoin<InSameComponint><<<1, block, smem>>>(candidates, objects, ncandidates, groupThreshold, grouping_eps, nclasses); disjoin<InSameComponint><<<1, block, smem>>>(candidates, objects, ncandidates, groupThreshold, grouping_eps, nclasses);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
struct Cascade struct Cascade

@ -114,10 +114,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplateNaiveKernel_CCORR<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result); matchTemplateNaiveKernel_CCORR<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream) void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
@ -184,10 +184,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplateNaiveKernel_SQDIFF<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result); matchTemplateNaiveKernel_SQDIFF<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream) void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
@ -240,10 +240,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_SQDIFF_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result); matchTemplatePreparedKernel_SQDIFF_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn, void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
@ -312,10 +312,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_SQDIFF_NORMED_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result); matchTemplatePreparedKernel_SQDIFF_NORMED_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -355,10 +355,10 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_CCOFF_8U<<<grid, threads, 0, stream>>>(w, h, (float)templ_sum / (w * h), image_sum, result); matchTemplatePreparedKernel_CCOFF_8U<<<grid, threads, 0, stream>>>(w, h, (float)templ_sum / (w * h), image_sum, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -399,10 +399,10 @@ namespace cv { namespace gpu { namespace cudev
matchTemplatePreparedKernel_CCOFF_8UC2<<<grid, threads, 0, stream>>>( matchTemplatePreparedKernel_CCOFF_8UC2<<<grid, threads, 0, stream>>>(
w, h, (float)templ_sum_r / (w * h), (float)templ_sum_g / (w * h), w, h, (float)templ_sum_r / (w * h), (float)templ_sum_g / (w * h),
image_sum_r, image_sum_g, result); image_sum_r, image_sum_g, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -457,10 +457,10 @@ namespace cv { namespace gpu { namespace cudev
(float)templ_sum_g / (w * h), (float)templ_sum_g / (w * h),
(float)templ_sum_b / (w * h), (float)templ_sum_b / (w * h),
image_sum_r, image_sum_g, image_sum_b, result); image_sum_r, image_sum_g, image_sum_b, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -525,10 +525,10 @@ namespace cv { namespace gpu { namespace cudev
(float)templ_sum_a / (w * h), (float)templ_sum_a / (w * h),
image_sum_r, image_sum_g, image_sum_b, image_sum_a, image_sum_r, image_sum_g, image_sum_b, image_sum_a,
result); result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
@ -574,10 +574,10 @@ namespace cv { namespace gpu { namespace cudev
matchTemplatePreparedKernel_CCOFF_NORMED_8U<<<grid, threads, 0, stream>>>( matchTemplatePreparedKernel_CCOFF_NORMED_8U<<<grid, threads, 0, stream>>>(
w, h, weight, templ_sum_scale, templ_sqsum_scale, w, h, weight, templ_sum_scale, templ_sqsum_scale,
image_sum, image_sqsum, result); image_sum, image_sqsum, result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -640,10 +640,10 @@ namespace cv { namespace gpu { namespace cudev
image_sum_r, image_sqsum_r, image_sum_r, image_sqsum_r,
image_sum_g, image_sqsum_g, image_sum_g, image_sqsum_g,
result); result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -720,10 +720,10 @@ namespace cv { namespace gpu { namespace cudev
image_sum_g, image_sqsum_g, image_sum_g, image_sqsum_g,
image_sum_b, image_sqsum_b, image_sum_b, image_sqsum_b,
result); result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -812,10 +812,10 @@ namespace cv { namespace gpu { namespace cudev
image_sum_b, image_sqsum_b, image_sum_b, image_sqsum_b,
image_sum_a, image_sqsum_a, image_sum_a, image_sqsum_a,
result); result);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
@ -860,10 +860,10 @@ namespace cv { namespace gpu { namespace cudev
break; break;
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
@ -904,10 +904,10 @@ namespace cv { namespace gpu { namespace cudev
extractFirstChannel_32F<4><<<grid, threads, 0, stream>>>(image, result); extractFirstChannel_32F<4><<<grid, threads, 0, stream>>>(image, result);
break; break;
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} //namespace match_template } //namespace match_template
}}} // namespace cv { namespace gpu { namespace cudev }}} // namespace cv { namespace gpu { namespace cudev

@ -145,10 +145,10 @@ namespace cv { namespace gpu { namespace cudev
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>( cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows); mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream) void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
@ -194,10 +194,10 @@ namespace cv { namespace gpu { namespace cudev
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(), polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows); angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream) void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)

@ -432,12 +432,12 @@ namespace sum
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, SingleMask(mask), op, twidth, theight); kernel<threads_x * threads_y><<<grid, block>>>(src, buf, SingleMask(mask), op, twidth, theight);
else else
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, WithOutMask(), op, twidth, theight); kernel<threads_x * threads_y><<<grid, block>>>(src, buf, WithOutMask(), op, twidth, theight);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
R result[4] = {0, 0, 0, 0}; R result[4] = {0, 0, 0, 0};
cvCudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) );
out[0] = result[0]; out[0] = result[0];
out[1] = result[1]; out[1] = result[1];
@ -761,13 +761,13 @@ namespace minMax
else else
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight); kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
R minval_, maxval_; R minval_, maxval_;
cvCudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
cvCudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
*minval = minval_; *minval = minval_;
*maxval = maxval_; *maxval = maxval_;
} }
@ -934,22 +934,22 @@ namespace minMaxLoc
else else
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight); kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y); kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
T minval_, maxval_; T minval_, maxval_;
cvCudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
cvCudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
*minval = minval_; *minval = minval_;
*maxval = maxval_; *maxval = maxval_;
unsigned int minloc_, maxloc_; unsigned int minloc_, maxloc_;
cvCudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
cvCudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols; minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols; maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
} }
@ -1065,15 +1065,15 @@ namespace countNonZero
unsigned int* count_buf = buf.ptr(0); unsigned int* count_buf = buf.ptr(0);
cvCudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) ); cudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) );
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, count_buf, twidth, theight); kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, count_buf, twidth, theight);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
unsigned int count; unsigned int count;
cvCudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost)); cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost));
return count; return count;
} }
@ -1236,10 +1236,10 @@ namespace reduce
Op op; Op op;
rowsKernel<T, S, D, Op><<<grid, block, 0, stream>>>(src, dst, op); rowsKernel<T, S, D, Op><<<grid, block, 0, stream>>>(src, dst, op);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T, typename S, typename D> template <typename T, typename S, typename D>
@ -1316,10 +1316,10 @@ namespace reduce
Op op; Op op;
colsKernel<BLOCK_SIZE, T, S, D, cn, Op><<<grid, block, 0, stream>>>((PtrStepSz<typename TypeVec<T, cn>::vec_type>) src, (typename TypeVec<D, cn>::vec_type*) dst, op); colsKernel<BLOCK_SIZE, T, S, D, cn, Op><<<grid, block, 0, stream>>>((PtrStepSz<typename TypeVec<T, cn>::vec_type>) src, (typename TypeVec<D, cn>::vec_type*) dst, op);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }

@ -146,12 +146,12 @@ namespace cv { namespace gpu { namespace cudev
float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn); float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn);
float noise_mult = minus_h2_inv/(block_window * block_window); float noise_mult = minus_h2_inv/(block_window * block_window);
cvCudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel<T, B<T> >, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel<T, B<T> >, cudaFuncCachePreferL1) );
nlm_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, search_radius, block_radius, noise_mult); nlm_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, search_radius, block_radius, noise_mult);
cvCudaSafeCall ( cudaGetLastError () ); cudaSafeCall ( cudaGetLastError () );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template<typename T> template<typename T>
@ -505,9 +505,9 @@ namespace cv { namespace gpu { namespace cudev
fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst); fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst);
cvCudaSafeCall ( cudaGetLastError () ); cudaSafeCall ( cudaGetLastError () );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void nlm_fast_gpu<uchar>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); template void nlm_fast_gpu<uchar>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
@ -535,9 +535,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y)); dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y));
fnlm_split_kernel<<<g, b>>>(lab, l, ab); fnlm_split_kernel<<<g, b>>>(lab, l, ab);
cvCudaSafeCall ( cudaGetLastError () ); cudaSafeCall ( cudaGetLastError () );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void fnlm_merge_kernel(const PtrStepb l, const PtrStep<uchar2> ab, PtrStepSz<uchar3> lab) __global__ void fnlm_merge_kernel(const PtrStepb l, const PtrStep<uchar2> ab, PtrStepSz<uchar3> lab)
@ -558,9 +558,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y)); dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y));
fnlm_merge_kernel<<<g, b>>>(l, ab, lab); fnlm_merge_kernel<<<g, b>>>(l, ab, lab);
cvCudaSafeCall ( cudaGetLastError () ); cudaSafeCall ( cudaGetLastError () );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
}}} }}}

@ -159,10 +159,10 @@ namespace optflowbm
calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious, calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious,
maxX, maxY, acceptLevel, escapeLevel, ss, ssCount); maxX, maxY, acceptLevel, escapeLevel, ss, ssCount);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -402,10 +402,10 @@ namespace optflowbm_fast
size_t smem = search_window * search_window * sizeof(int); size_t smem = search_window * search_window * sizeof(int);
optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely); optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely);
cvCudaSafeCall ( cudaGetLastError () ); cudaSafeCall ( cudaGetLastError () );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void calc<uchar>(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream); template void calc<uchar>(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);

@ -119,9 +119,9 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(u_avg.cols, u_avg.rows); const dim3 grid(u_avg.cols, u_avg.rows);
NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg); NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale) __global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
@ -210,9 +210,9 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y)); const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale); NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
}}} }}}

@ -123,13 +123,13 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
int polyN, const float *g, const float *xg, const float *xxg, int polyN, const float *g, const float *xg, const float *xxg,
float ig11, float ig03, float ig33, float ig55) float ig11, float ig03, float ig33, float ig55)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(c_g, g, (polyN + 1) * sizeof(*g))); cudaSafeCall(cudaMemcpyToSymbol(c_g, g, (polyN + 1) * sizeof(*g)));
cvCudaSafeCall(cudaMemcpyToSymbol(c_xg, xg, (polyN + 1) * sizeof(*xg))); cudaSafeCall(cudaMemcpyToSymbol(c_xg, xg, (polyN + 1) * sizeof(*xg)));
cvCudaSafeCall(cudaMemcpyToSymbol(c_xxg, xxg, (polyN + 1) * sizeof(*xxg))); cudaSafeCall(cudaMemcpyToSymbol(c_xxg, xxg, (polyN + 1) * sizeof(*xxg)));
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig11, &ig11, sizeof(ig11))); cudaSafeCall(cudaMemcpyToSymbol(c_ig11, &ig11, sizeof(ig11)));
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig03, &ig03, sizeof(ig03))); cudaSafeCall(cudaMemcpyToSymbol(c_ig03, &ig03, sizeof(ig03)));
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig33, &ig33, sizeof(ig33))); cudaSafeCall(cudaMemcpyToSymbol(c_ig33, &ig33, sizeof(ig33)));
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig55, &ig55, sizeof(ig55))); cudaSafeCall(cudaMemcpyToSymbol(c_ig55, &ig55, sizeof(ig55)));
} }
@ -144,10 +144,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
else if (polyN == 7) else if (polyN == 7)
polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -244,7 +244,7 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
void setUpdateMatricesConsts() void setUpdateMatricesConsts()
{ {
static const float border[BORDER_SIZE + 1] = {0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f}; static const float border[BORDER_SIZE + 1] = {0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f};
cvCudaSafeCall(cudaMemcpyToSymbol(c_border, border, (BORDER_SIZE + 1) * sizeof(*border))); cudaSafeCall(cudaMemcpyToSymbol(c_border, border, (BORDER_SIZE + 1) * sizeof(*border)));
} }
@ -257,10 +257,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
updateMatrices<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, flowx, flowy, R0, R1, M); updateMatrices<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, flowx, flowy, R0, R1, M);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -293,10 +293,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
updateFlow<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, M, flowx, flowy); updateFlow<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, M, flowx, flowy);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -424,10 +424,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf)); float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst); boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -443,10 +443,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf)); float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst); boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -494,7 +494,7 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
void setGaussianBlurKernel(const float *gKer, int ksizeHalf) void setGaussianBlurKernel(const float *gKer, int ksizeHalf)
{ {
cvCudaSafeCall(cudaMemcpyToSymbol(c_gKer, gKer, (ksizeHalf + 1) * sizeof(*gKer))); cudaSafeCall(cudaMemcpyToSymbol(c_gKer, gKer, (ksizeHalf + 1) * sizeof(*gKer)));
} }
@ -511,10 +511,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
gaussianBlur<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst); gaussianBlur<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -606,10 +606,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
gaussianBlur5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst); gaussianBlur5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
cvCudaSafeCall(cudaGetLastError()); cudaSafeCall(cudaGetLastError());
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }

@ -132,10 +132,10 @@ namespace cv { namespace gpu { namespace cudev
HarrisResponses<<<grid, block, 0, stream>>>(img, loc, response, npoints, blockSize, harris_k); HarrisResponses<<<grid, block, 0, stream>>>(img, loc, response, npoints, blockSize, harris_k);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -145,7 +145,7 @@ namespace cv { namespace gpu { namespace cudev
void loadUMax(const int* u_max, int count) void loadUMax(const int* u_max, int count)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) );
} }
__global__ void IC_Angle(const PtrStepb image, const short2* loc_, float* angle, const int npoints, const int half_k) __global__ void IC_Angle(const PtrStepb image, const short2* loc_, float* angle, const int npoints, const int half_k)
@ -214,10 +214,10 @@ namespace cv { namespace gpu { namespace cudev
IC_Angle<<<grid, block, 0, stream>>>(image, loc, angle, npoints, half_k); IC_Angle<<<grid, block, 0, stream>>>(image, loc, angle, npoints, half_k);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -382,10 +382,10 @@ namespace cv { namespace gpu { namespace cudev
break; break;
} }
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -413,10 +413,10 @@ namespace cv { namespace gpu { namespace cudev
mergeLocation<<<grid, block, 0, stream>>>(loc, x, y, npoints, scale); mergeLocation<<<grid, block, 0, stream>>>(loc, x, y, npoints, scale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
}}} }}}

@ -181,10 +181,10 @@ namespace cv { namespace gpu { namespace cudev
B<T> b(src.rows, src.cols); B<T> b(src.rows, src.cols);
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols); pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)

@ -150,10 +150,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
pyrUp<<<grid, block, 0, stream>>>(src, dst); pyrUp<<<grid, block, 0, stream>>>(src, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)

@ -320,10 +320,10 @@ namespace pyrlk
else else
sparseKernel<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols); sparseKernel<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <bool calcErr> template <bool calcErr>
@ -474,14 +474,14 @@ namespace pyrlk
void loadConstants(int2 winSize, int iters) void loadConstants(int2 winSize, int iters)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2); int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cvCudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
} }
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
@ -544,16 +544,16 @@ namespace pyrlk
if (err.data) if (err.data)
{ {
denseKernel<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols); denseKernel<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
else else
{ {
denseKernel<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols); denseKernel<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }

@ -81,7 +81,7 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc); Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst); remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
}; };
@ -102,9 +102,9 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc); Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -135,8 +135,8 @@ namespace cv { namespace gpu { namespace cudev
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \ BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \ Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \ remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
cvCudaSafeCall( cudaGetLastError() ); \ cudaSafeCall( cudaGetLastError() ); \
cvCudaSafeCall( cudaDeviceSynchronize() ); \ cudaSafeCall( cudaDeviceSynchronize() ); \
} \ } \
}; \ }; \
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \ template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
@ -160,8 +160,8 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \ Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \ remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
} \ } \
cvCudaSafeCall( cudaGetLastError() ); \ cudaSafeCall( cudaGetLastError() ); \
cvCudaSafeCall( cudaDeviceSynchronize() ); \ cudaSafeCall( cudaDeviceSynchronize() ); \
} \ } \
}; };

@ -92,7 +92,7 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy); Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst); resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
}; };
@ -107,9 +107,9 @@ namespace cv { namespace gpu { namespace cudev
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd); BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy); AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst); resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -123,9 +123,9 @@ namespace cv { namespace gpu { namespace cudev
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd); BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy); IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst); resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -145,9 +145,9 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc); Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -185,8 +185,8 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \ Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \ resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \ } \
cvCudaSafeCall( cudaGetLastError() ); \ cudaSafeCall( cudaGetLastError() ); \
cvCudaSafeCall( cudaDeviceSynchronize() ); \ cudaSafeCall( cudaDeviceSynchronize() ); \
} \ } \
}; };

@ -140,9 +140,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
Gray_to_YV12<<<grid, block>>>(src, dst); Gray_to_YV12<<<grid, block>>>(src, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <int cn> template <int cn>
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst) void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
@ -153,9 +153,9 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2)); dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst); BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst) void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)

@ -168,10 +168,10 @@ namespace row_filter
B<T> brd(src.cols); B<T> brd(src.cols);
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd); linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -362,9 +362,9 @@ namespace filter
}; };
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else else
cvCudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream); callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
} }

@ -236,10 +236,10 @@ namespace cv { namespace gpu { namespace cudev
src[0].data, src[0].step, src[0].data, src[0].step,
src[1].data, src[1].step, src[1].data, src[1].step,
dst.rows, dst.cols, dst.data, dst.step); dst.rows, dst.cols, dst.data, dst.step);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -253,10 +253,10 @@ namespace cv { namespace gpu { namespace cudev
src[1].data, src[1].step, src[1].data, src[1].step,
src[2].data, src[2].step, src[2].data, src[2].step,
dst.rows, dst.cols, dst.data, dst.step); dst.rows, dst.cols, dst.data, dst.step);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -271,10 +271,10 @@ namespace cv { namespace gpu { namespace cudev
src[2].data, src[2].step, src[2].data, src[2].step,
src[3].data, src[3].step, src[3].data, src[3].step,
dst.rows, dst.cols, dst.data, dst.step); dst.rows, dst.cols, dst.data, dst.step);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -445,10 +445,10 @@ namespace cv { namespace gpu { namespace cudev
src.data, src.step, src.rows, src.cols, src.data, src.step, src.rows, src.cols,
dst[0].data, dst[0].step, dst[0].data, dst[0].step,
dst[1].data, dst[1].step); dst[1].data, dst[1].step);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -462,10 +462,10 @@ namespace cv { namespace gpu { namespace cudev
dst[0].data, dst[0].step, dst[0].data, dst[0].step,
dst[1].data, dst[1].step, dst[1].data, dst[1].step,
dst[2].data, dst[2].step); dst[2].data, dst[2].step);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
@ -480,10 +480,10 @@ namespace cv { namespace gpu { namespace cudev
dst[1].data, dst[1].step, dst[1].data, dst[1].step,
dst[2].data, dst[2].step, dst[2].data, dst[2].step,
dst[3].data, dst[3].step); dst[3].data, dst[3].step);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }

@ -322,10 +322,10 @@ namespace cv { namespace gpu { namespace cudev
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int); size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp); stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
}; };
typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream); typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream);
@ -353,15 +353,15 @@ namespace cv { namespace gpu { namespace cudev
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) ); //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) ); //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
cvCudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) ); cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
cvCudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) ); cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
cvCudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) ); cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
cvCudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) ); cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
cvCudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) ); cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize(); size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
cvCudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) ); cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
callers[winsz2](left, right, disp, maxdisp, stream); callers[winsz2](left, right, disp, maxdisp, stream);
} }
@ -392,7 +392,7 @@ namespace cv { namespace gpu { namespace cudev
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream) void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream)
{ {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cvCudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) ); cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
dim3 threads(16, 16, 1); dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -401,12 +401,12 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(input.rows, threads.y); grid.y = divUp(input.rows, threads.y);
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap); prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaUnbindTexture (texForSobel ) ); cudaSafeCall( cudaUnbindTexture (texForSobel ) );
} }
@ -516,7 +516,7 @@ namespace cv { namespace gpu { namespace cudev
texForTF.addressMode[1] = cudaAddressModeWrap; texForTF.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cvCudaSafeCall( cudaBindTexture2D( 0, texForTF, input.data, desc, input.cols, input.rows, input.step ) ); cudaSafeCall( cudaBindTexture2D( 0, texForTF, input.data, desc, input.cols, input.rows, input.step ) );
dim3 threads(128, 1, 1); dim3 threads(128, 1, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -526,12 +526,12 @@ namespace cv { namespace gpu { namespace cudev
size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float); size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float);
textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold); textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
cvCudaSafeCall( cudaUnbindTexture (texForTF) ); cudaSafeCall( cudaUnbindTexture (texForTF) );
} }
} // namespace stereobm } // namespace stereobm
}}} // namespace cv { namespace gpu { namespace cudev }}} // namespace cv { namespace gpu { namespace cudev

@ -62,11 +62,11 @@ namespace cv { namespace gpu { namespace cudev
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump) void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) ); cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
} }
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
@ -172,10 +172,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(left.rows, threads.y); grid.y = divUp(left.rows, threads.y);
comp_data<1, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data); comp_data<1, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <> void comp_data_gpu<uchar, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream) template <> void comp_data_gpu<uchar, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{ {
@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(left.rows, threads.y); grid.y = divUp(left.rows, threads.y);
comp_data<1, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data); comp_data<1, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <> void comp_data_gpu<uchar3, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream) template <> void comp_data_gpu<uchar3, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
@ -201,10 +201,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(left.rows, threads.y); grid.y = divUp(left.rows, threads.y);
comp_data<3, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data); comp_data<3, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <> void comp_data_gpu<uchar3, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream) template <> void comp_data_gpu<uchar3, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{ {
@ -215,10 +215,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(left.rows, threads.y); grid.y = divUp(left.rows, threads.y);
comp_data<3, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data); comp_data<3, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <> void comp_data_gpu<uchar4, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream) template <> void comp_data_gpu<uchar4, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
@ -230,10 +230,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(left.rows, threads.y); grid.y = divUp(left.rows, threads.y);
comp_data<4, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data); comp_data<4, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <> void comp_data_gpu<uchar4, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream) template <> void comp_data_gpu<uchar4, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{ {
@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(left.rows, threads.y); grid.y = divUp(left.rows, threads.y);
comp_data<4, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data); comp_data<4, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
@ -284,10 +284,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(dst_rows, threads.y); grid.y = divUp(dst_rows, threads.y);
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst); data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
@ -328,19 +328,19 @@ namespace cv { namespace gpu { namespace cudev
int src_idx = (dst_idx + 1) & 1; int src_idx = (dst_idx + 1) & 1;
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mus[src_idx], (PtrStepSz<T>)mus[dst_idx]); level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mus[src_idx], (PtrStepSz<T>)mus[dst_idx]);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mds[src_idx], (PtrStepSz<T>)mds[dst_idx]); level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mds[src_idx], (PtrStepSz<T>)mds[dst_idx]);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mls[src_idx], (PtrStepSz<T>)mls[dst_idx]); level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mls[src_idx], (PtrStepSz<T>)mls[dst_idx]);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mrs[src_idx], (PtrStepSz<T>)mrs[dst_idx]); level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mrs[src_idx], (PtrStepSz<T>)mrs[dst_idx]);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream); template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
@ -459,10 +459,10 @@ namespace cv { namespace gpu { namespace cudev
for(int t = 0; t < iters; ++t) for(int t = 0; t < iters; ++t)
{ {
one_iteration<T><<<grid, threads, 0, stream>>>(t, elem_step, (T*)u.data, (T*)d.data, (T*)l.data, (T*)r.data, (PtrStepSz<T>)data, cols, rows); one_iteration<T><<<grid, threads, 0, stream>>>(t, elem_step, (T*)u.data, (T*)d.data, (T*)l.data, (T*)r.data, (PtrStepSz<T>)data, cols, rows);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -524,10 +524,10 @@ namespace cv { namespace gpu { namespace cudev
int elem_step = static_cast<int>(u.step/sizeof(T)); int elem_step = static_cast<int>(u.step/sizeof(T));
output<T><<<grid, threads, 0, stream>>>(elem_step, (const T*)u.data, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, disp); output<T><<<grid, threads, 0, stream>>>(elem_step, (const T*)u.data, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, disp);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream); template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);

@ -78,20 +78,20 @@ namespace cv { namespace gpu { namespace cudev
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th, void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp) const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) ); cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) ); cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
cvCudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) ); cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
} }
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
@ -362,14 +362,14 @@ namespace cv { namespace gpu { namespace cudev
}; };
size_t disp_step = msg_step * h; size_t disp_step = msg_step * h;
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream); init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -382,10 +382,10 @@ namespace cv { namespace gpu { namespace cudev
else else
get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane); get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step, template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
@ -546,15 +546,15 @@ namespace cv { namespace gpu { namespace cudev
size_t disp_step1 = msg_step * h; size_t disp_step1 = msg_step * h;
size_t disp_step2 = msg_step * h2; size_t disp_step2 = msg_step * h2;
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream); callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step, template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step,
@ -662,9 +662,9 @@ namespace cv { namespace gpu { namespace cudev
size_t disp_step1 = msg_step * h; size_t disp_step1 = msg_step * h;
size_t disp_step2 = msg_step * h2; size_t disp_step2 = msg_step * h2;
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -677,10 +677,10 @@ namespace cv { namespace gpu { namespace cudev
selected_disp_pyr_new, selected_disp_pyr_cur, selected_disp_pyr_new, selected_disp_pyr_cur,
data_cost_selected, data_cost, data_cost_selected, data_cost,
h, w, nr_plane, h2, w2, nr_plane2); h, w, nr_plane, h2, w2, nr_plane2);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -767,8 +767,8 @@ namespace cv { namespace gpu { namespace cudev
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream) const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
{ {
size_t disp_step = msg_step * h; size_t disp_step = msg_step * h;
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -779,10 +779,10 @@ namespace cv { namespace gpu { namespace cudev
for(int t = 0; t < iters; ++t) for(int t = 0; t < iters; ++t)
{ {
compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1); compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
}; };
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step, template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
@ -837,8 +837,8 @@ namespace cv { namespace gpu { namespace cudev
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream) const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream)
{ {
size_t disp_step = disp.rows * msg_step; size_t disp_step = disp.rows * msg_step;
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) ); cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
@ -847,10 +847,10 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(disp.rows, threads.y); grid.y = divUp(disp.rows, threads.y);
compute_disp<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane); compute_disp<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step, template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,

@ -72,9 +72,9 @@ namespace tvl1flow
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
centeredGradientKernel<<<grid, block>>>(src, dx, dy); centeredGradientKernel<<<grid, block>>>(src, dx, dy);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -174,9 +174,9 @@ namespace tvl1flow
bindTexture(&tex_I1y, I1y); bindTexture(&tex_I1y, I1y);
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -280,9 +280,9 @@ namespace tvl1flow
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -323,9 +323,9 @@ namespace tvl1flow
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y)); const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut); estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }

@ -102,22 +102,22 @@ namespace cv { namespace gpu { namespace cudev
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y)); dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap); buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream) void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream); buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
} }
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream) void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream); buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
} }
@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc); Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst); warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
}; };
@ -174,9 +174,9 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc); Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
warp<Transform><<<grid, block>>>(filter_src, dst); warp<Transform><<<grid, block>>>(filter_src, dst);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -206,8 +206,8 @@ namespace cv { namespace gpu { namespace cudev
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \ BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \ Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \ warp<Transform><<<grid, block>>>(filter_src, dst); \
cvCudaSafeCall( cudaGetLastError() ); \ cudaSafeCall( cudaGetLastError() ); \
cvCudaSafeCall( cudaDeviceSynchronize() ); \ cudaSafeCall( cudaDeviceSynchronize() ); \
} \ } \
}; \ }; \
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \ template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
@ -230,8 +230,8 @@ namespace cv { namespace gpu { namespace cudev
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \ Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
warp<Transform><<<grid, block>>>(filter_src, dst); \ warp<Transform><<<grid, block>>>(filter_src, dst); \
} \ } \
cvCudaSafeCall( cudaGetLastError() ); \ cudaSafeCall( cudaGetLastError() ); \
cvCudaSafeCall( cudaDeviceSynchronize() ); \ cudaSafeCall( cudaDeviceSynchronize() ); \
} \ } \
}; };
@ -310,7 +310,7 @@ namespace cv { namespace gpu { namespace cudev
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20) int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20); warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
} }
@ -348,7 +348,7 @@ namespace cv { namespace gpu { namespace cudev
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20) int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20); warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
} }

@ -150,7 +150,7 @@ namespace
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int DEPTH, typename NppArithmScalarFunc<DEPTH, 1>::func_ptr func> struct NppArithmScalar<DEPTH, 1, func> template<int DEPTH, typename NppArithmScalarFunc<DEPTH, 1>::func_ptr func> struct NppArithmScalar<DEPTH, 1, func>
@ -168,7 +168,7 @@ namespace
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int DEPTH, typename NppArithmScalarFunc<DEPTH, 2>::func_ptr func> struct NppArithmScalar<DEPTH, 2, func> template<int DEPTH, typename NppArithmScalarFunc<DEPTH, 2>::func_ptr func> struct NppArithmScalar<DEPTH, 2, func>
@ -192,7 +192,7 @@ namespace
(npp_complex_type*)dst.data, static_cast<int>(dst.step), sz, 0) ); (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int cn, typename NppArithmScalarFunc<CV_32F, cn>::func_ptr func> struct NppArithmScalar<CV_32F, cn, func> template<int cn, typename NppArithmScalarFunc<CV_32F, cn>::func_ptr func> struct NppArithmScalar<CV_32F, cn, func>
@ -212,7 +212,7 @@ namespace
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<typename NppArithmScalarFunc<CV_32F, 1>::func_ptr func> struct NppArithmScalar<CV_32F, 1, func> template<typename NppArithmScalarFunc<CV_32F, 1>::func_ptr func> struct NppArithmScalar<CV_32F, 1, func>
@ -230,7 +230,7 @@ namespace
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<Npp32f>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<Npp32f>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<typename NppArithmScalarFunc<CV_32F, 2>::func_ptr func> struct NppArithmScalar<CV_32F, 2, func> template<typename NppArithmScalarFunc<CV_32F, 2>::func_ptr func> struct NppArithmScalar<CV_32F, 2, func>
@ -253,7 +253,7 @@ namespace
nppSafeCall( func((const npp_complex_type*)src.data, static_cast<int>(src.step), nConstant, (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz) ); nppSafeCall( func((const npp_complex_type*)src.data, static_cast<int>(src.step), nConstant, (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }
@ -2218,7 +2218,7 @@ namespace
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) ); nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template <int DEPTH, typename NppBitwiseCFunc<DEPTH, 1>::func_t func> struct NppBitwiseC<DEPTH, 1, func> template <int DEPTH, typename NppBitwiseCFunc<DEPTH, 1>::func_t func> struct NppBitwiseC<DEPTH, 1, func>
@ -2236,7 +2236,7 @@ namespace
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) ); nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }
@ -2349,7 +2349,7 @@ namespace
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) ); nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template <int DEPTH, typename NppShiftFunc<DEPTH, 1>::func_t func> struct NppShift<DEPTH, 1, func> template <int DEPTH, typename NppShiftFunc<DEPTH, 1>::func_t func> struct NppShift<DEPTH, 1, func>
@ -2367,7 +2367,7 @@ namespace
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) ); nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }
@ -2708,7 +2708,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) ); dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
else else
{ {
@ -2805,7 +2805,7 @@ namespace
dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) ); dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }

@ -288,7 +288,7 @@ namespace
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) ); dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }
@ -322,7 +322,7 @@ namespace
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) ); dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }
@ -368,7 +368,7 @@ namespace
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) ); dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
nppFilterBox_t func; nppFilterBox_t func;
@ -436,7 +436,7 @@ namespace
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) ); dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
GpuMat kernel; GpuMat kernel;
@ -702,7 +702,7 @@ namespace
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) ); kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
GpuMat kernel; GpuMat kernel;
@ -865,7 +865,7 @@ namespace
kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) ); kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
GpuMat kernel; GpuMat kernel;
@ -967,7 +967,7 @@ namespace
kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) ); kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
GpuMat kernel; GpuMat kernel;
@ -1307,7 +1307,7 @@ namespace
nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) ); nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
nppFilterRank_t func; nppFilterRank_t func;

@ -198,7 +198,7 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
#endif #endif
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight, void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
@ -276,7 +276,7 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
#endif #endif
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */

@ -278,7 +278,7 @@ void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf&
ushort2* oldBuf = oldBuf_; ushort2* oldBuf = oldBuf_;
ushort2* newBuf = newBuf_; ushort2* newBuf = newBuf_;
cvCudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
const int cellSize = cvRound(minDist); const int cellSize = cvRound(minDist);
const int gridWidth = (src.cols + cellSize - 1) / cellSize; const int gridWidth = (src.cols + cellSize - 1) / cellSize;
@ -338,7 +338,7 @@ void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf&
} }
} }
cvCudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
centersCount = newCount; centersCount = newCount;
} }
@ -603,8 +603,8 @@ namespace
oldPosBuf.resize(posCount); oldPosBuf.resize(posCount);
oldVoteBuf.resize(posCount); oldVoteBuf.resize(posCount);
cvCudaSafeCall( cudaMemcpy(&oldPosBuf[0], outBuf.ptr(0), posCount * sizeof(float4), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&oldPosBuf[0], outBuf.ptr(0), posCount * sizeof(float4), cudaMemcpyDeviceToHost) );
cvCudaSafeCall( cudaMemcpy(&oldVoteBuf[0], outBuf.ptr(1), posCount * sizeof(int3), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&oldVoteBuf[0], outBuf.ptr(1), posCount * sizeof(int3), cudaMemcpyDeviceToHost) );
indexies.resize(posCount); indexies.resize(posCount);
for (int i = 0; i < posCount; ++i) for (int i = 0; i < posCount; ++i)
@ -677,8 +677,8 @@ namespace
} }
posCount = static_cast<int>(newPosBuf.size()); posCount = static_cast<int>(newPosBuf.size());
cvCudaSafeCall( cudaMemcpy(outBuf.ptr(0), &newPosBuf[0], posCount * sizeof(float4), cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy(outBuf.ptr(0), &newPosBuf[0], posCount * sizeof(float4), cudaMemcpyHostToDevice) );
cvCudaSafeCall( cudaMemcpy(outBuf.ptr(1), &newVoteBuf[0], posCount * sizeof(int3), cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy(outBuf.ptr(1), &newVoteBuf[0], posCount * sizeof(int3), cudaMemcpyHostToDevice) );
} }
void GHT_Pos::convertTo(GpuMat& positions) void GHT_Pos::convertTo(GpuMat& positions)
@ -1153,7 +1153,7 @@ namespace
true, templCenter); true, templCenter);
h_buf.resize(templFeatures.sizes.cols); h_buf.resize(templFeatures.sizes.cols);
cvCudaSafeCall( cudaMemcpy(&h_buf[0], templFeatures.sizes.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&h_buf[0], templFeatures.sizes.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
templFeatures.maxSize = *max_element(h_buf.begin(), h_buf.end()); templFeatures.maxSize = *max_element(h_buf.begin(), h_buf.end());
} }
@ -1279,7 +1279,7 @@ namespace
hist.setTo(Scalar::all(0)); hist.setTo(Scalar::all(0));
GHT_Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), GHT_Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0),
hist.ptr<int>(), (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize); hist.ptr<int>(), (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize);
cvCudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
angles.clear(); angles.clear();
@ -1303,7 +1303,7 @@ namespace
hist.setTo(Scalar::all(0)); hist.setTo(Scalar::all(0));
GHT_Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), GHT_Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0),
hist.ptr<int>(), (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale, (float)iScaleStep, scaleRange, levels, templFeatures.maxSize); hist.ptr<int>(), (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale, (float)iScaleStep, scaleRange, levels, templFeatures.maxSize);
cvCudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
scales.clear(); scales.clear();

@ -320,7 +320,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
} }
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
else else
{ {
@ -494,7 +494,7 @@ namespace
dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) ); dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }
@ -581,7 +581,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
roiSize.height = src.rows; roiSize.height = src.rows;
cudaDeviceProp prop; cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize; Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
@ -594,7 +594,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) ); sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
@ -610,7 +610,7 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
roiSize.height = src.rows; roiSize.height = src.rows;
cudaDeviceProp prop; cudaDeviceProp prop;
cvCudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
Ncv32u bufSize; Ncv32u bufSize;
ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop)); ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
@ -625,7 +625,7 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop)); sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@ -674,7 +674,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) ); dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
@ -726,7 +726,7 @@ namespace
lowerLevel, upperLevel, buffer.ptr<Npp8u>()) ); lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size> template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
@ -758,7 +758,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) ); nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
@ -826,7 +826,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) ); nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size> template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
@ -866,7 +866,7 @@ namespace
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) ); nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }

@ -80,11 +80,11 @@ namespace
public: public:
explicit DeviceBuffer(int count_ = 1) : count(count_) explicit DeviceBuffer(int count_ = 1) : count(count_)
{ {
cvCudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) ); cudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) );
} }
~DeviceBuffer() ~DeviceBuffer()
{ {
cvCudaSafeCall( cudaFree(pdev) ); cudaSafeCall( cudaFree(pdev) );
} }
operator double*() {return pdev;} operator double*() {return pdev;}
@ -92,13 +92,13 @@ namespace
void download(double* hptr) void download(double* hptr)
{ {
double hbuf; double hbuf;
cvCudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) );
*hptr = hbuf; *hptr = hbuf;
} }
void download(double** hptrs) void download(double** hptrs)
{ {
AutoBuffer<double, 2 * sizeof(double)> hbuf(count); AutoBuffer<double, 2 * sizeof(double)> hbuf(count);
cvCudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) );
for (int i = 0; i < count; ++i) for (int i = 0; i < count; ++i)
*hptrs[i] = hbuf[i]; *hptrs[i] = hbuf[i];
} }
@ -143,7 +143,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) ); nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
double* ptrs[2] = {mean.val, stddev.val}; double* ptrs[2] = {mean.val, stddev.val};
dbuf.download(ptrs); dbuf.download(ptrs);
@ -205,7 +205,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) ); nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
dbuf.download(&retVal); dbuf.download(&retVal);

@ -216,10 +216,10 @@ namespace cv { namespace gpu { namespace cudev
kernelDownsampleX2<<<gDim, bDim, 0, stream>>>((T*)src.data, static_cast<Ncv32u>(src.step), kernelDownsampleX2<<<gDim, bDim, 0, stream>>>((T*)src.data, static_cast<Ncv32u>(src.step),
(T*)dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows)); (T*)dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows));
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void kernelDownsampleX2_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void kernelDownsampleX2_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
@ -291,10 +291,10 @@ namespace cv { namespace gpu { namespace cudev
kernelInterpolateFrom1<<<gDim, bDim, 0, stream>>>((T*) src.data, static_cast<Ncv32u>(src.step), NcvSize32u(src.cols, src.rows), kernelInterpolateFrom1<<<gDim, bDim, 0, stream>>>((T*) src.data, static_cast<Ncv32u>(src.step), NcvSize32u(src.cols, src.rows),
(T*) dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows)); (T*) dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows));
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void kernelInterpolateFrom1_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); template void kernelInterpolateFrom1_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);

@ -184,9 +184,9 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
ensureSizeIsEnough(1, ssCount, CV_16SC2, buf); ensureSizeIsEnough(1, ssCount, CV_16SC2, buf);
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) );
else else
cvCudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) ); cudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) );
const int maxX = prev.cols - blockSize.width; const int maxX = prev.cols - blockSize.width;
const int maxY = prev.rows - blockSize.height; const int maxY = prev.rows - blockSize.height;

@ -82,7 +82,7 @@ void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& f
v.create(frame0.size(), CV_32FC1); v.create(frame0.size(), CV_32FC1);
cudaDeviceProp devProp; cudaDeviceProp devProp;
cvCudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) ); cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
NCVBroxOpticalFlowDescriptor desc; NCVBroxOpticalFlowDescriptor desc;
@ -185,7 +185,7 @@ void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, cons
ncvSafeCall( nppiStInterpolateFrames(&state) ); ncvSafeCall( nppiStInterpolateFrames(&state) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
namespace cv { namespace gpu { namespace cudev namespace cv { namespace gpu { namespace cudev

@ -133,7 +133,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) ); dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
else else
{ {

@ -170,7 +170,7 @@ namespace
coeffs, npp_inter[interpolation]) ); coeffs, npp_inter[interpolation]) );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}; };
} }

@ -106,19 +106,19 @@ namespace cv { namespace gpu { namespace cudev
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold) void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_max_candidates, &maxCandidates, sizeof(maxCandidates)) ); cudaSafeCall( cudaMemcpyToSymbol(c_max_candidates, &maxCandidates, sizeof(maxCandidates)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_max_features, &maxFeatures, sizeof(maxFeatures)) ); cudaSafeCall( cudaMemcpyToSymbol(c_max_features, &maxFeatures, sizeof(maxFeatures)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_img_rows, &img_rows, sizeof(img_rows)) ); cudaSafeCall( cudaMemcpyToSymbol(c_img_rows, &img_rows, sizeof(img_rows)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_img_cols, &img_cols, sizeof(img_cols)) ); cudaSafeCall( cudaMemcpyToSymbol(c_img_cols, &img_cols, sizeof(img_cols)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_nOctaveLayers, &nOctaveLayers, sizeof(nOctaveLayers)) ); cudaSafeCall( cudaMemcpyToSymbol(c_nOctaveLayers, &nOctaveLayers, sizeof(nOctaveLayers)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_hessianThreshold, &hessianThreshold, sizeof(hessianThreshold)) ); cudaSafeCall( cudaMemcpyToSymbol(c_hessianThreshold, &hessianThreshold, sizeof(hessianThreshold)) );
} }
void loadOctaveConstants(int octave, int layer_rows, int layer_cols) void loadOctaveConstants(int octave, int layer_rows, int layer_cols)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_octave, &octave, sizeof(octave)) ); cudaSafeCall( cudaMemcpyToSymbol(c_octave, &octave, sizeof(octave)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_layer_rows, &layer_rows, sizeof(layer_rows)) ); cudaSafeCall( cudaMemcpyToSymbol(c_layer_rows, &layer_rows, sizeof(layer_rows)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_layer_cols, &layer_cols, sizeof(layer_cols)) ); cudaSafeCall( cudaMemcpyToSymbol(c_layer_cols, &layer_cols, sizeof(layer_cols)) );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -137,14 +137,14 @@ namespace cv { namespace gpu { namespace cudev
{ {
size_t offset; size_t offset;
cudaChannelFormatDesc desc_sum = cudaCreateChannelDesc<uint>(); cudaChannelFormatDesc desc_sum = cudaCreateChannelDesc<uint>();
cvCudaSafeCall( cudaBindTexture2D(&offset, sumTex, sum.data, desc_sum, sum.cols, sum.rows, sum.step)); cudaSafeCall( cudaBindTexture2D(&offset, sumTex, sum.data, desc_sum, sum.cols, sum.rows, sum.step));
return offset / sizeof(uint); return offset / sizeof(uint);
} }
size_t bindMaskSumTex(PtrStepSz<uint> maskSum) size_t bindMaskSumTex(PtrStepSz<uint> maskSum)
{ {
size_t offset; size_t offset;
cudaChannelFormatDesc desc_sum = cudaCreateChannelDesc<uint>(); cudaChannelFormatDesc desc_sum = cudaCreateChannelDesc<uint>();
cvCudaSafeCall( cudaBindTexture2D(&offset, maskSumTex, maskSum.data, desc_sum, maskSum.cols, maskSum.rows, maskSum.step)); cudaSafeCall( cudaBindTexture2D(&offset, maskSumTex, maskSum.data, desc_sum, maskSum.cols, maskSum.rows, maskSum.step));
return offset / sizeof(uint); return offset / sizeof(uint);
} }
@ -245,9 +245,9 @@ namespace cv { namespace gpu { namespace cudev
grid.y = divUp(max_samples_i, threads.y) * (nOctaveLayers + 2); grid.y = divUp(max_samples_i, threads.y) * (nOctaveLayers + 2);
icvCalcLayerDetAndTrace<<<grid, threads>>>(det, trace); icvCalcLayerDetAndTrace<<<grid, threads>>>(det, trace);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -395,9 +395,9 @@ namespace cv { namespace gpu { namespace cudev
else else
icvFindMaximaInLayer<WithOutMask><<<grid, threads, smem_size>>>(det, trace, maxPosBuffer, maxCounter); icvFindMaximaInLayer<WithOutMask><<<grid, threads, smem_size>>>(det, trace, maxPosBuffer, maxCounter);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -520,9 +520,9 @@ namespace cv { namespace gpu { namespace cudev
grid.x = maxCounter; grid.x = maxCounter;
icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureOctave, featureSize, featureHessian, featureCounter); icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureOctave, featureSize, featureHessian, featureCounter);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -686,9 +686,9 @@ namespace cv { namespace gpu { namespace cudev
grid.x = nFeatures; grid.x = nFeatures;
icvCalcOrientation<<<grid, threads>>>(featureX, featureY, featureSize, featureDir); icvCalcOrientation<<<grid, threads>>>(featureX, featureY, featureSize, featureDir);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -932,26 +932,26 @@ namespace cv { namespace gpu { namespace cudev
if (descriptors.cols == 64) if (descriptors.cols == 64)
{ {
compute_descriptors_64<<<nFeatures, dim3(32, 16)>>>(descriptors, featureX, featureY, featureSize, featureDir); compute_descriptors_64<<<nFeatures, dim3(32, 16)>>>(descriptors, featureX, featureY, featureSize, featureDir);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
normalize_descriptors<64><<<nFeatures, 64>>>((PtrStepSzf) descriptors); normalize_descriptors<64><<<nFeatures, 64>>>((PtrStepSzf) descriptors);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
else else
{ {
compute_descriptors_128<<<nFeatures, dim3(32, 16)>>>(descriptors, featureX, featureY, featureSize, featureDir); compute_descriptors_128<<<nFeatures, dim3(32, 16)>>>(descriptors, featureX, featureY, featureSize, featureDir);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
normalize_descriptors<128><<<nFeatures, 128>>>((PtrStepSzf) descriptors); normalize_descriptors<128><<<nFeatures, 128>>>((PtrStepSzf) descriptors);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} }
} // namespace surf } // namespace surf

@ -69,10 +69,10 @@ namespace cv { namespace gpu { namespace cudev
void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor) void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) );
cvCudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) );
} }
__device__ __forceinline__ uint nextRand(uint& state) __device__ __forceinline__ uint nextRand(uint& state)
@ -137,13 +137,13 @@ namespace cv { namespace gpu { namespace cudev
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
init<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, (PtrStepSz<SampleT>) samples, randStates); init<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, (PtrStepSz<SampleT>) samples, randStates);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream) void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
@ -246,13 +246,13 @@ namespace cv { namespace gpu { namespace cudev
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
cvCudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
update<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, (PtrStepSz<SampleT>) samples, randStates); update<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, (PtrStepSz<SampleT>) samples, randStates);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream) void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)

@ -177,7 +177,7 @@ namespace
img_rows, img_cols, octave, use_mask, surf_.nOctaveLayers); img_rows, img_cols, octave, use_mask, surf_.nOctaveLayers);
unsigned int maxCounter; unsigned int maxCounter;
cvCudaSafeCall( cudaMemcpy(&maxCounter, counters.ptr<unsigned int>() + 1 + octave, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&maxCounter, counters.ptr<unsigned int>() + 1 + octave, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates)); maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
if (maxCounter > 0) if (maxCounter > 0)
@ -190,7 +190,7 @@ namespace
} }
} }
unsigned int featureCounter; unsigned int featureCounter;
cvCudaSafeCall( cudaMemcpy(&featureCounter, counters.ptr<unsigned int>(), sizeof(unsigned int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&featureCounter, counters.ptr<unsigned int>(), sizeof(unsigned int), cudaMemcpyDeviceToHost) );
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures)); featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
keypoints.cols = featureCounter; keypoints.cols = featureCounter;

@ -340,7 +340,7 @@ struct cv::softcascade::SCascade::Fields
else else
cudaMemset(objects.data, 0, sizeof(Detection)); cudaMemset(objects.data, 0, sizeof(Detection));
cvCudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
cudev::CascadeInvoker<cudev::GK107PolicyX4> invoker cudev::CascadeInvoker<cudev::GK107PolicyX4> invoker
= cudev::CascadeInvoker<cudev::GK107PolicyX4>(levels, stages, nodes, leaves); = cudev::CascadeInvoker<cudev::GK107PolicyX4>(levels, stages, nodes, leaves);

@ -102,9 +102,9 @@ namespace btv_l1_cudev
backwardMotionX, bacwardMotionY, backwardMotionX, bacwardMotionY,
forwardMapX, forwardMapY, forwardMapX, forwardMapY,
backwardMapX, backwardMapY); backwardMapX, backwardMapY);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T> template <typename T>
@ -128,10 +128,10 @@ namespace btv_l1_cudev
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
upscaleKernel<src_t><<<grid, block, 0, stream>>>((PtrStepSz<src_t>) src, (PtrStepSz<src_t>) dst, scale); upscaleKernel<src_t><<<grid, block, 0, stream>>>((PtrStepSz<src_t>) src, (PtrStepSz<src_t>) dst, scale);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void upscale<1>(const PtrStepSzb src, PtrStepSzb dst, int scale, cudaStream_t stream); template void upscale<1>(const PtrStepSzb src, PtrStepSzb dst, int scale, cudaStream_t stream);
@ -211,7 +211,7 @@ namespace btv_l1_cudev
void loadBtvWeights(const float* weights, size_t count) void loadBtvWeights(const float* weights, size_t count)
{ {
cvCudaSafeCall( cudaMemcpyToSymbol(c_btvRegWeights, weights, count * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_btvRegWeights, weights, count * sizeof(float)) );
} }
template <int cn> template <int cn>
@ -223,9 +223,9 @@ namespace btv_l1_cudev
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
calcBtvRegularizationKernel<src_t><<<grid, block>>>((PtrStepSz<src_t>) src, (PtrStepSz<src_t>) dst, ksize); calcBtvRegularizationKernel<src_t><<<grid, block>>>((PtrStepSz<src_t>) src, (PtrStepSz<src_t>) dst, ksize);
cvCudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cvCudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void calcBtvRegularization<1>(PtrStepSzb src, PtrStepSzb dst, int ksize); template void calcBtvRegularization<1>(PtrStepSzb src, PtrStepSzb dst, int ksize);

Loading…
Cancel
Save