fixed build for CARMA platform

pull/258/merge
Vladislav Vinogradov 12 years ago
parent 889674ef43
commit da93a1dab9
  1. 23
      cmake/OpenCVDetectCUDA.cmake
  2. 1
      modules/core/CMakeLists.txt
  3. 26
      modules/core/src/gpumat.cpp
  4. 51
      modules/core/src/opengl_interop.cpp
  5. 2
      modules/gpu/app/nv_perf_test/CMakeLists.txt
  6. 1
      modules/gpu/include/opencv2/gpu/device/common.hpp
  7. 50
      modules/gpu/src/cuda/canny.cu
  8. 1
      modules/gpu/src/cuda/ccomponetns.cu
  9. 335
      modules/gpu/src/cuda/column_filter.h
  10. 136
      modules/gpu/src/cuda/element_operations.cu
  11. 1
      modules/gpu/src/cuda/gftt.cu
  12. 11
      modules/gpu/src/cuda/global_motion.cu
  13. 13
      modules/gpu/src/cuda/hist.cu
  14. 16
      modules/gpu/src/cuda/hog.cu
  15. 2
      modules/gpu/src/cuda/hough.cu
  16. 15
      modules/gpu/src/cuda/matrix_reductions.cu
  17. 5
      modules/gpu/src/cuda/optflowbm.cu
  18. 1
      modules/gpu/src/cuda/orb.cu
  19. 37
      modules/gpu/src/cuda/pyrlk.cu
  20. 335
      modules/gpu/src/cuda/row_filter.h
  21. 36
      modules/gpu/src/cuda/tvl1flow.cu
  22. 2
      modules/gpu/src/imgproc.cpp
  23. 12
      modules/gpu/test/nvidia/TestHaarCascadeApplication.cpp
  24. 8
      samples/gpu/driver_api_multi.cpp
  25. 2
      samples/gpu/driver_api_stereo_multi.cpp
  26. 3
      samples/gpu/softcascade.cpp

@ -3,12 +3,12 @@ if(${CMAKE_VERSION} VERSION_LESS "2.8.3")
return()
endif()
if (WIN32 AND NOT MSVC)
if(WIN32 AND NOT MSVC)
message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler suppoted on your platform).")
return()
endif()
if (CMAKE_COMPILER_IS_GNUCXX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
if(CMAKE_COMPILER_IS_GNUCXX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "CUDA compilation is disabled (due to Clang unsuppoted on your platform).")
return()
endif()
@ -72,11 +72,11 @@ if(CUDA_FOUND)
# Tell NVCC to add PTX intermediate code for the specified architectures
string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
foreach(ARCH IN LISTS ARCH_LIST)
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")
set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
endforeach()
foreach(ARCH IN LISTS ARCH_LIST)
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")
set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
endforeach()
# These vars will be processed in other scripts
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
@ -84,7 +84,7 @@ if(CUDA_FOUND)
message(STATUS "CUDA NVCC target flags: ${CUDA_NVCC_FLAGS}")
OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF)
OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF)
if(CUDA_FAST_MATH)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math)
@ -92,7 +92,6 @@ if(CUDA_FOUND)
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
unset(CUDA_npp_LIBRARY CACHE)
find_cuda_helper_libs(npp)
macro(ocv_cuda_compile VAR)
@ -106,15 +105,15 @@ if(CUDA_FOUND)
string(REPLACE "-ggdb3" "" ${var} "${${var}}")
endforeach()
if (BUILD_SHARED_LIBS)
if(BUILD_SHARED_LIBS)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -DCVAPI_EXPORTS)
endif()
if(UNIX OR APPLE)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC)
endif()
if(APPLE)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
endif()
# disabled because of multiple warnings during building nvcc auto generated files

@ -10,7 +10,6 @@ if(HAVE_CUDA)
file(GLOB lib_cuda "src/cuda/*.cu")
ocv_cuda_compile(cuda_objs ${lib_cuda})
set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
else()
set(lib_cuda "")

@ -45,8 +45,7 @@
#include <iostream>
#ifdef HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
#include <npp.h>
#define CUDART_MINIMUM_REQUIRED_VERSION 4010
@ -394,18 +393,6 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory)
namespace
{
template <class T> void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
{
*attribute = T();
//CUresult error = CUDA_SUCCESS;// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu??
CUresult error = cuDeviceGetAttribute( attribute, device_attribute, device );
if( CUDA_SUCCESS == error )
return;
printf("Driver API error = %04d\n", error);
cv::gpu::error("driver API error", __FILE__, __LINE__);
}
int convertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
@ -466,17 +453,6 @@ void cv::gpu::printCudaDeviceInfo(int device)
convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
// This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
int memoryClock, memBusWidth, L2CacheSize;
getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
getCudaAttribute<int>( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev );
getCudaAttribute<int>( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev );
printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f);
printf(" Memory Bus Width: %d-bit\n", memBusWidth);
if (L2CacheSize)
printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);

@ -44,11 +44,13 @@
#include "opencv2/core/opengl_interop.hpp"
#include "opencv2/core/gpumat.hpp"
#include "gl_core_3_1.hpp"
#ifdef HAVE_OPENGL
#include "gl_core_3_1.hpp"
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#ifdef HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#endif
#endif
using namespace std;
@ -61,24 +63,24 @@ namespace
void throw_nogl() { CV_Error(CV_OpenGlNotSupported, "The library is compiled without OpenGL support"); }
#else
void throw_nogl() { CV_Error(CV_OpenGlApiCallError, "OpenGL context doesn't exist"); }
#endif
#ifndef HAVE_CUDA
void throw_nocuda() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); }
#else
void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
#ifndef HAVE_CUDA
void throw_nocuda() { CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support"); }
#else
void throw_nocuda() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
void ___cudaSafeCall(cudaError_t err, const char* file, const int line, const char* func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
void ___cudaSafeCall(cudaError_t err, const char* file, const int line, const char* func = "")
{
if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
#endif
#endif
}
@ -139,11 +141,16 @@ namespace
void cv::gpu::setGlDevice(int device)
{
#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER)
#ifndef HAVE_OPENGL
(void) device;
throw_nocuda();
throw_nogl();
#else
cudaSafeCall( cudaGLSetGLDevice(device) );
#if !defined(HAVE_CUDA) || defined(CUDA_DISABLER)
(void) device;
throw_nocuda();
#else
cudaSafeCall( cudaGLSetGLDevice(device) );
#endif
#endif
}

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.6)
cmake_minimum_required(VERSION 2.8.3)
project(nv_perf_test)

@ -100,7 +100,6 @@ namespace cv { namespace gpu
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
{

@ -52,7 +52,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace canny
{
struct L1 : binary_function<int, int, float>
{
@ -78,17 +78,17 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<L1> : DefaultTransformFunctorTraits<L1>
template <> struct TransformFunctorTraits<canny::L1> : DefaultTransformFunctorTraits<canny::L1>
{
enum { smart_shift = 4 };
};
template <> struct TransformFunctorTraits<L2> : DefaultTransformFunctorTraits<L2>
template <> struct TransformFunctorTraits<canny::L2> : DefaultTransformFunctorTraits<canny::L2>
{
enum { smart_shift = 4 };
};
}}}
namespace
namespace canny
{
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
struct SrcTex
@ -104,7 +104,7 @@ namespace
};
template <class Norm> __global__
void calcMagnitude(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -120,10 +120,7 @@ namespace
mag(y, x) = norm(dxVal, dyVal);
}
}
namespace canny
{
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
{
const dim3 block(16, 16);
@ -135,12 +132,12 @@ namespace canny
if (L2Grad)
{
L2 norm;
::calcMagnitude<<<grid, block>>>(src, dx, dy, mag, norm);
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
}
else
{
L1 norm;
::calcMagnitude<<<grid, block>>>(src, dx, dy, mag, norm);
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
}
cudaSafeCall( cudaGetLastError() );
@ -165,11 +162,11 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace canny
{
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void calcMap(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
{
const int CANNY_SHIFT = 15;
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
@ -220,10 +217,7 @@ namespace
map(y, x) = edge_type;
}
}
namespace canny
{
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
{
const dim3 block(16, 16);
@ -231,7 +225,7 @@ namespace canny
bindTexture(&tex_mag, mag);
::calcMap<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
@ -240,11 +234,11 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace canny
{
__device__ int counter = 0;
__global__ void edgesHysteresisLocal(PtrStepSzi map, ushort2* st)
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, ushort2* st)
{
__shared__ volatile int smem[18][18];
@ -325,10 +319,7 @@ namespace
st[ind] = make_ushort2(x, y);
}
}
}
namespace canny
{
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
{
void* counter_ptr;
@ -339,7 +330,7 @@ namespace canny
const dim3 block(16, 16);
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
::edgesHysteresisLocal<<<grid, block>>>(map, st1);
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
@ -348,12 +339,12 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace canny
{
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
{
const int stack_size = 512;
@ -439,14 +430,11 @@ namespace
st2[ind + i] = s_st[i];
}
}
}
namespace canny
{
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, ::counter) );
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
@ -458,7 +446,7 @@ namespace canny
const dim3 block(128);
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
::edgesHysteresisGlobal<<<grid, block>>>(map, st1, st2, count);
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
@ -472,7 +460,7 @@ namespace canny
//////////////////////////////////////////////////////////////////////////////////////////
namespace
namespace canny
{
struct GetEdges : unary_function<int, uchar>
{
@ -488,7 +476,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<GetEdges> : DefaultTransformFunctorTraits<GetEdges>
template <> struct TransformFunctorTraits<canny::GetEdges> : DefaultTransformFunctorTraits<canny::GetEdges>
{
enum { smart_shift = 4 };
};

@ -497,6 +497,7 @@ namespace cv { namespace gpu { namespace device
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream)
{
(void) flags;
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));

@ -49,20 +49,12 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace column_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
@ -196,182 +188,185 @@ namespace filter
{
{
0,
::caller< 1, T, D, BrdColReflect101>,
::caller< 2, T, D, BrdColReflect101>,
::caller< 3, T, D, BrdColReflect101>,
::caller< 4, T, D, BrdColReflect101>,
::caller< 5, T, D, BrdColReflect101>,
::caller< 6, T, D, BrdColReflect101>,
::caller< 7, T, D, BrdColReflect101>,
::caller< 8, T, D, BrdColReflect101>,
::caller< 9, T, D, BrdColReflect101>,
::caller<10, T, D, BrdColReflect101>,
::caller<11, T, D, BrdColReflect101>,
::caller<12, T, D, BrdColReflect101>,
::caller<13, T, D, BrdColReflect101>,
::caller<14, T, D, BrdColReflect101>,
::caller<15, T, D, BrdColReflect101>,
::caller<16, T, D, BrdColReflect101>,
::caller<17, T, D, BrdColReflect101>,
::caller<18, T, D, BrdColReflect101>,
::caller<19, T, D, BrdColReflect101>,
::caller<20, T, D, BrdColReflect101>,
::caller<21, T, D, BrdColReflect101>,
::caller<22, T, D, BrdColReflect101>,
::caller<23, T, D, BrdColReflect101>,
::caller<24, T, D, BrdColReflect101>,
::caller<25, T, D, BrdColReflect101>,
::caller<26, T, D, BrdColReflect101>,
::caller<27, T, D, BrdColReflect101>,
::caller<28, T, D, BrdColReflect101>,
::caller<29, T, D, BrdColReflect101>,
::caller<30, T, D, BrdColReflect101>,
::caller<31, T, D, BrdColReflect101>,
::caller<32, T, D, BrdColReflect101>
column_filter::caller< 1, T, D, BrdColReflect101>,
column_filter::caller< 2, T, D, BrdColReflect101>,
column_filter::caller< 3, T, D, BrdColReflect101>,
column_filter::caller< 4, T, D, BrdColReflect101>,
column_filter::caller< 5, T, D, BrdColReflect101>,
column_filter::caller< 6, T, D, BrdColReflect101>,
column_filter::caller< 7, T, D, BrdColReflect101>,
column_filter::caller< 8, T, D, BrdColReflect101>,
column_filter::caller< 9, T, D, BrdColReflect101>,
column_filter::caller<10, T, D, BrdColReflect101>,
column_filter::caller<11, T, D, BrdColReflect101>,
column_filter::caller<12, T, D, BrdColReflect101>,
column_filter::caller<13, T, D, BrdColReflect101>,
column_filter::caller<14, T, D, BrdColReflect101>,
column_filter::caller<15, T, D, BrdColReflect101>,
column_filter::caller<16, T, D, BrdColReflect101>,
column_filter::caller<17, T, D, BrdColReflect101>,
column_filter::caller<18, T, D, BrdColReflect101>,
column_filter::caller<19, T, D, BrdColReflect101>,
column_filter::caller<20, T, D, BrdColReflect101>,
column_filter::caller<21, T, D, BrdColReflect101>,
column_filter::caller<22, T, D, BrdColReflect101>,
column_filter::caller<23, T, D, BrdColReflect101>,
column_filter::caller<24, T, D, BrdColReflect101>,
column_filter::caller<25, T, D, BrdColReflect101>,
column_filter::caller<26, T, D, BrdColReflect101>,
column_filter::caller<27, T, D, BrdColReflect101>,
column_filter::caller<28, T, D, BrdColReflect101>,
column_filter::caller<29, T, D, BrdColReflect101>,
column_filter::caller<30, T, D, BrdColReflect101>,
column_filter::caller<31, T, D, BrdColReflect101>,
column_filter::caller<32, T, D, BrdColReflect101>
},
{
0,
::caller< 1, T, D, BrdColReplicate>,
::caller< 2, T, D, BrdColReplicate>,
::caller< 3, T, D, BrdColReplicate>,
::caller< 4, T, D, BrdColReplicate>,
::caller< 5, T, D, BrdColReplicate>,
::caller< 6, T, D, BrdColReplicate>,
::caller< 7, T, D, BrdColReplicate>,
::caller< 8, T, D, BrdColReplicate>,
::caller< 9, T, D, BrdColReplicate>,
::caller<10, T, D, BrdColReplicate>,
::caller<11, T, D, BrdColReplicate>,
::caller<12, T, D, BrdColReplicate>,
::caller<13, T, D, BrdColReplicate>,
::caller<14, T, D, BrdColReplicate>,
::caller<15, T, D, BrdColReplicate>,
::caller<16, T, D, BrdColReplicate>,
::caller<17, T, D, BrdColReplicate>,
::caller<18, T, D, BrdColReplicate>,
::caller<19, T, D, BrdColReplicate>,
::caller<20, T, D, BrdColReplicate>,
::caller<21, T, D, BrdColReplicate>,
::caller<22, T, D, BrdColReplicate>,
::caller<23, T, D, BrdColReplicate>,
::caller<24, T, D, BrdColReplicate>,
::caller<25, T, D, BrdColReplicate>,
::caller<26, T, D, BrdColReplicate>,
::caller<27, T, D, BrdColReplicate>,
::caller<28, T, D, BrdColReplicate>,
::caller<29, T, D, BrdColReplicate>,
::caller<30, T, D, BrdColReplicate>,
::caller<31, T, D, BrdColReplicate>,
::caller<32, T, D, BrdColReplicate>
column_filter::caller< 1, T, D, BrdColReplicate>,
column_filter::caller< 2, T, D, BrdColReplicate>,
column_filter::caller< 3, T, D, BrdColReplicate>,
column_filter::caller< 4, T, D, BrdColReplicate>,
column_filter::caller< 5, T, D, BrdColReplicate>,
column_filter::caller< 6, T, D, BrdColReplicate>,
column_filter::caller< 7, T, D, BrdColReplicate>,
column_filter::caller< 8, T, D, BrdColReplicate>,
column_filter::caller< 9, T, D, BrdColReplicate>,
column_filter::caller<10, T, D, BrdColReplicate>,
column_filter::caller<11, T, D, BrdColReplicate>,
column_filter::caller<12, T, D, BrdColReplicate>,
column_filter::caller<13, T, D, BrdColReplicate>,
column_filter::caller<14, T, D, BrdColReplicate>,
column_filter::caller<15, T, D, BrdColReplicate>,
column_filter::caller<16, T, D, BrdColReplicate>,
column_filter::caller<17, T, D, BrdColReplicate>,
column_filter::caller<18, T, D, BrdColReplicate>,
column_filter::caller<19, T, D, BrdColReplicate>,
column_filter::caller<20, T, D, BrdColReplicate>,
column_filter::caller<21, T, D, BrdColReplicate>,
column_filter::caller<22, T, D, BrdColReplicate>,
column_filter::caller<23, T, D, BrdColReplicate>,
column_filter::caller<24, T, D, BrdColReplicate>,
column_filter::caller<25, T, D, BrdColReplicate>,
column_filter::caller<26, T, D, BrdColReplicate>,
column_filter::caller<27, T, D, BrdColReplicate>,
column_filter::caller<28, T, D, BrdColReplicate>,
column_filter::caller<29, T, D, BrdColReplicate>,
column_filter::caller<30, T, D, BrdColReplicate>,
column_filter::caller<31, T, D, BrdColReplicate>,
column_filter::caller<32, T, D, BrdColReplicate>
},
{
0,
::caller< 1, T, D, BrdColConstant>,
::caller< 2, T, D, BrdColConstant>,
::caller< 3, T, D, BrdColConstant>,
::caller< 4, T, D, BrdColConstant>,
::caller< 5, T, D, BrdColConstant>,
::caller< 6, T, D, BrdColConstant>,
::caller< 7, T, D, BrdColConstant>,
::caller< 8, T, D, BrdColConstant>,
::caller< 9, T, D, BrdColConstant>,
::caller<10, T, D, BrdColConstant>,
::caller<11, T, D, BrdColConstant>,
::caller<12, T, D, BrdColConstant>,
::caller<13, T, D, BrdColConstant>,
::caller<14, T, D, BrdColConstant>,
::caller<15, T, D, BrdColConstant>,
::caller<16, T, D, BrdColConstant>,
::caller<17, T, D, BrdColConstant>,
::caller<18, T, D, BrdColConstant>,
::caller<19, T, D, BrdColConstant>,
::caller<20, T, D, BrdColConstant>,
::caller<21, T, D, BrdColConstant>,
::caller<22, T, D, BrdColConstant>,
::caller<23, T, D, BrdColConstant>,
::caller<24, T, D, BrdColConstant>,
::caller<25, T, D, BrdColConstant>,
::caller<26, T, D, BrdColConstant>,
::caller<27, T, D, BrdColConstant>,
::caller<28, T, D, BrdColConstant>,
::caller<29, T, D, BrdColConstant>,
::caller<30, T, D, BrdColConstant>,
::caller<31, T, D, BrdColConstant>,
::caller<32, T, D, BrdColConstant>
column_filter::caller< 1, T, D, BrdColConstant>,
column_filter::caller< 2, T, D, BrdColConstant>,
column_filter::caller< 3, T, D, BrdColConstant>,
column_filter::caller< 4, T, D, BrdColConstant>,
column_filter::caller< 5, T, D, BrdColConstant>,
column_filter::caller< 6, T, D, BrdColConstant>,
column_filter::caller< 7, T, D, BrdColConstant>,
column_filter::caller< 8, T, D, BrdColConstant>,
column_filter::caller< 9, T, D, BrdColConstant>,
column_filter::caller<10, T, D, BrdColConstant>,
column_filter::caller<11, T, D, BrdColConstant>,
column_filter::caller<12, T, D, BrdColConstant>,
column_filter::caller<13, T, D, BrdColConstant>,
column_filter::caller<14, T, D, BrdColConstant>,
column_filter::caller<15, T, D, BrdColConstant>,
column_filter::caller<16, T, D, BrdColConstant>,
column_filter::caller<17, T, D, BrdColConstant>,
column_filter::caller<18, T, D, BrdColConstant>,
column_filter::caller<19, T, D, BrdColConstant>,
column_filter::caller<20, T, D, BrdColConstant>,
column_filter::caller<21, T, D, BrdColConstant>,
column_filter::caller<22, T, D, BrdColConstant>,
column_filter::caller<23, T, D, BrdColConstant>,
column_filter::caller<24, T, D, BrdColConstant>,
column_filter::caller<25, T, D, BrdColConstant>,
column_filter::caller<26, T, D, BrdColConstant>,
column_filter::caller<27, T, D, BrdColConstant>,
column_filter::caller<28, T, D, BrdColConstant>,
column_filter::caller<29, T, D, BrdColConstant>,
column_filter::caller<30, T, D, BrdColConstant>,
column_filter::caller<31, T, D, BrdColConstant>,
column_filter::caller<32, T, D, BrdColConstant>
},
{
0,
::caller< 1, T, D, BrdColReflect>,
::caller< 2, T, D, BrdColReflect>,
::caller< 3, T, D, BrdColReflect>,
::caller< 4, T, D, BrdColReflect>,
::caller< 5, T, D, BrdColReflect>,
::caller< 6, T, D, BrdColReflect>,
::caller< 7, T, D, BrdColReflect>,
::caller< 8, T, D, BrdColReflect>,
::caller< 9, T, D, BrdColReflect>,
::caller<10, T, D, BrdColReflect>,
::caller<11, T, D, BrdColReflect>,
::caller<12, T, D, BrdColReflect>,
::caller<13, T, D, BrdColReflect>,
::caller<14, T, D, BrdColReflect>,
::caller<15, T, D, BrdColReflect>,
::caller<16, T, D, BrdColReflect>,
::caller<17, T, D, BrdColReflect>,
::caller<18, T, D, BrdColReflect>,
::caller<19, T, D, BrdColReflect>,
::caller<20, T, D, BrdColReflect>,
::caller<21, T, D, BrdColReflect>,
::caller<22, T, D, BrdColReflect>,
::caller<23, T, D, BrdColReflect>,
::caller<24, T, D, BrdColReflect>,
::caller<25, T, D, BrdColReflect>,
::caller<26, T, D, BrdColReflect>,
::caller<27, T, D, BrdColReflect>,
::caller<28, T, D, BrdColReflect>,
::caller<29, T, D, BrdColReflect>,
::caller<30, T, D, BrdColReflect>,
::caller<31, T, D, BrdColReflect>,
::caller<32, T, D, BrdColReflect>
column_filter::caller< 1, T, D, BrdColReflect>,
column_filter::caller< 2, T, D, BrdColReflect>,
column_filter::caller< 3, T, D, BrdColReflect>,
column_filter::caller< 4, T, D, BrdColReflect>,
column_filter::caller< 5, T, D, BrdColReflect>,
column_filter::caller< 6, T, D, BrdColReflect>,
column_filter::caller< 7, T, D, BrdColReflect>,
column_filter::caller< 8, T, D, BrdColReflect>,
column_filter::caller< 9, T, D, BrdColReflect>,
column_filter::caller<10, T, D, BrdColReflect>,
column_filter::caller<11, T, D, BrdColReflect>,
column_filter::caller<12, T, D, BrdColReflect>,
column_filter::caller<13, T, D, BrdColReflect>,
column_filter::caller<14, T, D, BrdColReflect>,
column_filter::caller<15, T, D, BrdColReflect>,
column_filter::caller<16, T, D, BrdColReflect>,
column_filter::caller<17, T, D, BrdColReflect>,
column_filter::caller<18, T, D, BrdColReflect>,
column_filter::caller<19, T, D, BrdColReflect>,
column_filter::caller<20, T, D, BrdColReflect>,
column_filter::caller<21, T, D, BrdColReflect>,
column_filter::caller<22, T, D, BrdColReflect>,
column_filter::caller<23, T, D, BrdColReflect>,
column_filter::caller<24, T, D, BrdColReflect>,
column_filter::caller<25, T, D, BrdColReflect>,
column_filter::caller<26, T, D, BrdColReflect>,
column_filter::caller<27, T, D, BrdColReflect>,
column_filter::caller<28, T, D, BrdColReflect>,
column_filter::caller<29, T, D, BrdColReflect>,
column_filter::caller<30, T, D, BrdColReflect>,
column_filter::caller<31, T, D, BrdColReflect>,
column_filter::caller<32, T, D, BrdColReflect>
},
{
0,
::caller< 1, T, D, BrdColWrap>,
::caller< 2, T, D, BrdColWrap>,
::caller< 3, T, D, BrdColWrap>,
::caller< 4, T, D, BrdColWrap>,
::caller< 5, T, D, BrdColWrap>,
::caller< 6, T, D, BrdColWrap>,
::caller< 7, T, D, BrdColWrap>,
::caller< 8, T, D, BrdColWrap>,
::caller< 9, T, D, BrdColWrap>,
::caller<10, T, D, BrdColWrap>,
::caller<11, T, D, BrdColWrap>,
::caller<12, T, D, BrdColWrap>,
::caller<13, T, D, BrdColWrap>,
::caller<14, T, D, BrdColWrap>,
::caller<15, T, D, BrdColWrap>,
::caller<16, T, D, BrdColWrap>,
::caller<17, T, D, BrdColWrap>,
::caller<18, T, D, BrdColWrap>,
::caller<19, T, D, BrdColWrap>,
::caller<20, T, D, BrdColWrap>,
::caller<21, T, D, BrdColWrap>,
::caller<22, T, D, BrdColWrap>,
::caller<23, T, D, BrdColWrap>,
::caller<24, T, D, BrdColWrap>,
::caller<25, T, D, BrdColWrap>,
::caller<26, T, D, BrdColWrap>,
::caller<27, T, D, BrdColWrap>,
::caller<28, T, D, BrdColWrap>,
::caller<29, T, D, BrdColWrap>,
::caller<30, T, D, BrdColWrap>,
::caller<31, T, D, BrdColWrap>,
::caller<32, T, D, BrdColWrap>
column_filter::caller< 1, T, D, BrdColWrap>,
column_filter::caller< 2, T, D, BrdColWrap>,
column_filter::caller< 3, T, D, BrdColWrap>,
column_filter::caller< 4, T, D, BrdColWrap>,
column_filter::caller< 5, T, D, BrdColWrap>,
column_filter::caller< 6, T, D, BrdColWrap>,
column_filter::caller< 7, T, D, BrdColWrap>,
column_filter::caller< 8, T, D, BrdColWrap>,
column_filter::caller< 9, T, D, BrdColWrap>,
column_filter::caller<10, T, D, BrdColWrap>,
column_filter::caller<11, T, D, BrdColWrap>,
column_filter::caller<12, T, D, BrdColWrap>,
column_filter::caller<13, T, D, BrdColWrap>,
column_filter::caller<14, T, D, BrdColWrap>,
column_filter::caller<15, T, D, BrdColWrap>,
column_filter::caller<16, T, D, BrdColWrap>,
column_filter::caller<17, T, D, BrdColWrap>,
column_filter::caller<18, T, D, BrdColWrap>,
column_filter::caller<19, T, D, BrdColWrap>,
column_filter::caller<20, T, D, BrdColWrap>,
column_filter::caller<21, T, D, BrdColWrap>,
column_filter::caller<22, T, D, BrdColWrap>,
column_filter::caller<23, T, D, BrdColWrap>,
column_filter::caller<24, T, D, BrdColWrap>,
column_filter::caller<25, T, D, BrdColWrap>,
column_filter::caller<26, T, D, BrdColWrap>,
column_filter::caller<27, T, D, BrdColWrap>,
column_filter::caller<28, T, D, BrdColWrap>,
column_filter::caller<29, T, D, BrdColWrap>,
column_filter::caller<30, T, D, BrdColWrap>,
column_filter::caller<31, T, D, BrdColWrap>,
column_filter::caller<32, T, D, BrdColWrap>
}
};
::loadKernel(kernel, ksize, stream);
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}

@ -52,7 +52,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace arithm
{
template <size_t src_size, size_t dst_size> struct ArithmFuncTraits
{
@ -152,7 +152,7 @@ namespace
//////////////////////////////////////////////////////////////////////////
// addMat
namespace
namespace arithm
{
template <typename T, typename D> struct VAdd4;
template <> struct VAdd4<uint, uint> : binary_function<uint, uint, uint>
@ -336,19 +336,19 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D> struct TransformFunctorTraits< VAdd4<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAdd4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< VAdd2<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAdd2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< AddMat<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::AddMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
@ -446,7 +446,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// addScalar
namespace
namespace arithm
{
template <typename T, typename S, typename D> struct AddScalar : unary_function<T, D>
{
@ -463,7 +463,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< AddScalar<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
@ -541,7 +541,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// subMat
namespace
namespace arithm
{
template <typename T, typename D> struct VSub4;
template <> struct VSub4<uint, uint> : binary_function<uint, uint, uint>
@ -725,19 +725,19 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D> struct TransformFunctorTraits< VSub4<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::VSub4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< VSub2<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::VSub2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< SubMat<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
@ -908,7 +908,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// mulMat
namespace
namespace arithm
{
struct Mul_8uc4_32f : binary_function<uint, float, uint>
{
@ -966,15 +966,15 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<Mul_8uc4_32f> : ArithmFuncTraits<sizeof(uint), sizeof(uint)>
template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <typename T, typename D> struct TransformFunctorTraits< Mul<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::Mul<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
template <typename T, typename S, typename D> struct TransformFunctorTraits< MulScale<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
@ -1066,7 +1066,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// mulScalar
namespace
namespace arithm
{
template <typename T, typename S, typename D> struct MulScalar : unary_function<T, D>
{
@ -1083,7 +1083,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< MulScalar<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
@ -1157,7 +1157,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// divMat
namespace
namespace arithm
{
struct Div_8uc4_32f : binary_function<uint, float, uint>
{
@ -1234,15 +1234,15 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<Div_8uc4_32f> : ArithmFuncTraits<sizeof(uint), sizeof(uint)>
template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <typename T, typename D> struct TransformFunctorTraits< Div<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
template <typename T, typename S, typename D> struct TransformFunctorTraits< DivScale<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
@ -1403,7 +1403,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// divInv
namespace
namespace arithm
{
template <typename T, typename S, typename D> struct DivInv : unary_function<T, D>
{
@ -1420,7 +1420,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< DivInv<T, S, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
@ -1494,7 +1494,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// absDiffMat
namespace
namespace arithm
{
template <typename T, typename D> struct VAbsDiff4;
template <> struct VAbsDiff4<uint, uint> : binary_function<uint, uint, uint>
@ -1611,19 +1611,19 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D> struct TransformFunctorTraits< VAbsDiff4<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAbsDiff4<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
////////////////////////////////////
template <typename T, typename D> struct TransformFunctorTraits< VAbsDiff2<T, D> > : ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename T, typename D> struct TransformFunctorTraits< arithm::VAbsDiff2<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
////////////////////////////////////
template <typename T> struct TransformFunctorTraits< AbsDiffMat<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::AbsDiffMat<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -1666,7 +1666,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// absDiffScalar
namespace
namespace arithm
{
template <typename T, typename S> struct AbsDiffScalar : unary_function<T, T>
{
@ -1684,7 +1684,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T, typename S> struct TransformFunctorTraits< AbsDiffScalar<T, S> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -1713,7 +1713,7 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< abs_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -1738,7 +1738,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// sqrMat
namespace
namespace arithm
{
template <typename T> struct Sqr : unary_function<T, T>
{
@ -1754,7 +1754,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< Sqr<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -1781,7 +1781,7 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -1808,7 +1808,7 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< log_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -1833,7 +1833,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// expMat
namespace
namespace arithm
{
template <typename T> struct Exp : unary_function<T, T>
{
@ -1850,7 +1850,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< Exp<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -1875,7 +1875,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////////////////
// cmpMat
namespace
namespace arithm
{
template <class Op, typename T>
struct Cmp : binary_function<T, T, uchar>
@ -1890,7 +1890,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <class Op, typename T> struct TransformFunctorTraits< Cmp<Op, T> > : ArithmFuncTraits<sizeof(T), sizeof(uchar)>
template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
{
};
}}}
@ -1957,7 +1957,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////////////////
// cmpScalar
namespace
namespace arithm
{
#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type
@ -2020,7 +2020,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <class Op, typename T> struct TransformFunctorTraits< CmpScalar<Op, T, 1> > : ArithmFuncTraits<sizeof(T), sizeof(uchar)>
template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
{
};
}}}
@ -2179,19 +2179,19 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< bit_not<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< bit_and<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< bit_or<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -2252,15 +2252,15 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -2298,7 +2298,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// min
namespace
namespace arithm
{
template <typename T> struct VMin4;
template <> struct VMin4<uint> : binary_function<uint, uint, uint>
@ -2389,23 +2389,23 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< VMin4<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::VMin4<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
////////////////////////////////////
template <typename T> struct TransformFunctorTraits< VMin2<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::VMin2<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
////////////////////////////////////
template <typename T> struct TransformFunctorTraits< minimum<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< minimum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -2458,7 +2458,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// max
namespace
namespace arithm
{
template <typename T> struct VMax4;
template <> struct VMax4<uint> : binary_function<uint, uint, uint>
@ -2549,23 +2549,23 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< VMax4<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::VMax4<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
////////////////////////////////////
template <typename T> struct TransformFunctorTraits< VMax2<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::VMax2<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
////////////////////////////////////
template <typename T> struct TransformFunctorTraits< maximum<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< maximum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -2620,23 +2620,23 @@ namespace arithm
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -2679,7 +2679,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// pow
namespace
namespace arithm
{
template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
{
@ -2734,7 +2734,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T> struct TransformFunctorTraits< PowOp<T> > : ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
@ -2759,7 +2759,7 @@ namespace arithm
//////////////////////////////////////////////////////////////////////////
// addWeighted
namespace
namespace arithm
{
template <typename T> struct UseDouble_
{
@ -2809,14 +2809,14 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< AddWeighted<T1, T2, D> >
template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
{
};
template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : ArithmFuncTraits<src_size, dst_size>
template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : arithm::ArithmFuncTraits<src_size, dst_size>
{
};
template <typename T1, typename T2, typename D> struct TransformFunctorTraits< AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
template <typename T1, typename T2, typename D> struct TransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
{
};
}}}

@ -47,6 +47,7 @@
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"

@ -43,12 +43,11 @@
#if !defined CUDA_DISABLER
#include "thrust/device_ptr.h"
#include "thrust/remove.h"
#include "thrust/functional.h"
#include "internal_shared.hpp"
#include <thrust/device_ptr.h>
#include <thrust/remove.h>
#include <thrust/functional.h>
using namespace thrust;
#include "internal_shared.hpp"
namespace cv { namespace gpu { namespace device { namespace globmotion {
@ -64,7 +63,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask)
return thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dpoints0, dpoints1)),
thrust::make_zip_iterator(thrust::make_tuple(dpoints0 + N, dpoints1 + N)),
dmask, thrust::not1(thrust::identity<uchar>()))
- make_zip_iterator(make_tuple(dpoints0, dpoints1));
- thrust::make_zip_iterator(make_tuple(dpoints0, dpoints1));
}

@ -51,9 +51,9 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace hist
{
__global__ void histogram256(const uchar* src, int cols, int rows, size_t step, int* hist)
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist)
{
__shared__ int shist[256];
@ -94,16 +94,13 @@ namespace
if (histVal > 0)
::atomicAdd(hist + tid, histVal);
}
}
namespace hist
{
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.rows, block.y));
::histogram256<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
@ -113,7 +110,7 @@ namespace hist
/////////////////////////////////////////////////////////////////////////
namespace
namespace hist
{
__constant__ int c_lut[256];
@ -133,7 +130,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<EqualizeHist> : DefaultTransformFunctorTraits<EqualizeHist>
template <> struct TransformFunctorTraits<hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::EqualizeHist>
{
enum { smart_shift = 4 };
};

@ -244,15 +244,17 @@ namespace cv { namespace gpu { namespace device
return smem[0];
#endif
}
else
{
#if __CUDA_ARCH__ >= 300
if (threadIdx.x == 0)
smem[0] = sum;
#endif
#if __CUDA_ARCH__ >= 300
if (threadIdx.x == 0)
smem[0] = sum;
#endif
__syncthreads();
__syncthreads();
return smem[0];
return smem[0];
}
}

@ -42,7 +42,9 @@
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/emulation.hpp"
#include "opencv2/gpu/device/vec_math.hpp"

@ -55,7 +55,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace detail
{
template <int cn> struct Unroll;
template <> struct Unroll<1>
@ -218,7 +218,7 @@ namespace sum
{
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid, Unroll<cn>::op(plus<R>()));
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
if (tid == 0)
{
@ -254,7 +254,7 @@ namespace sum
{
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid, Unroll<cn>::op(plus<double>()));
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<double>()));
if (tid == 0)
{
@ -294,7 +294,7 @@ namespace sum
}
}
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(sum), tid, Unroll<cn>::op(plus<R>()));
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
}
@ -918,13 +918,11 @@ namespace countNonZero
__global__ void kernel(const PtrStepSz<T> src, unsigned int* count, const int twidth, const int theight)
{
__shared__ unsigned int scount[BLOCK_SIZE];
__shared__ bool is_last;
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
unsigned int mycount = 0;
@ -946,6 +944,9 @@ namespace countNonZero
if (tid == 0)
::atomicAdd(count, mycount);
#else
__shared__ bool is_last;
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
if (tid == 0)
{
count[bid] = mycount;
@ -1244,7 +1245,7 @@ namespace reduce
for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));
device::reduce<BLOCK_SIZE>(Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), Unroll<cn>::tie(myVal), threadIdx.x, Unroll<cn>::op(op));
device::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
if (threadIdx.x == 0)
dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));

@ -48,7 +48,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace optflowbm
{
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
@ -145,10 +145,7 @@ namespace
velx(i, j) = static_cast<float>(sumx) / countMin;
vely(i, j) = static_cast<float>(sumy) / countMin;
}
}
namespace optflowbm
{
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
{

@ -47,6 +47,7 @@
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/gpu/device/common.hpp"

@ -57,7 +57,7 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace pyrlk
{
__constant__ int c_winSize_x;
__constant__ int c_winSize_y;
@ -123,7 +123,7 @@ namespace
}
template <int cn, int PATCH_X, int PATCH_Y, bool calcErr>
__global__ void sparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
__global__ void sparseKernel(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols)
{
#if __CUDA_ARCH__ <= 110
const int BLOCK_SIZE = 128;
@ -321,9 +321,9 @@ namespace
dim3 grid(ptcount);
if (level == 0 && err)
sparse<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
sparseKernel<cn, PATCH_X, PATCH_Y, true><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
else
sparse<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
sparseKernel<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
cudaSafeCall( cudaGetLastError() );
@ -332,7 +332,7 @@ namespace
}
template <bool calcErr>
__global__ void dense(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
__global__ void denseKernel(PtrStepf u, PtrStepf v, const PtrStepf prevU, const PtrStepf prevV, PtrStepf err, const int rows, const int cols)
{
extern __shared__ int smem[];
@ -476,10 +476,7 @@ namespace
err(y, x) = static_cast<float>(errval) / (c_winSize_x * c_winSize_y);
}
}
}
namespace pyrlk
{
void loadConstants(int2 winSize, int iters)
{
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
@ -500,11 +497,11 @@ namespace pyrlk
static const func_t funcs[5][5] =
{
{::sparse_caller<1, 1, 1>, ::sparse_caller<1, 2, 1>, ::sparse_caller<1, 3, 1>, ::sparse_caller<1, 4, 1>, ::sparse_caller<1, 5, 1>},
{::sparse_caller<1, 1, 2>, ::sparse_caller<1, 2, 2>, ::sparse_caller<1, 3, 2>, ::sparse_caller<1, 4, 2>, ::sparse_caller<1, 5, 2>},
{::sparse_caller<1, 1, 3>, ::sparse_caller<1, 2, 3>, ::sparse_caller<1, 3, 3>, ::sparse_caller<1, 4, 3>, ::sparse_caller<1, 5, 3>},
{::sparse_caller<1, 1, 4>, ::sparse_caller<1, 2, 4>, ::sparse_caller<1, 3, 4>, ::sparse_caller<1, 4, 4>, ::sparse_caller<1, 5, 4>},
{::sparse_caller<1, 1, 5>, ::sparse_caller<1, 2, 5>, ::sparse_caller<1, 3, 5>, ::sparse_caller<1, 4, 5>, ::sparse_caller<1, 5, 5>}
{sparse_caller<1, 1, 1>, sparse_caller<1, 2, 1>, sparse_caller<1, 3, 1>, sparse_caller<1, 4, 1>, sparse_caller<1, 5, 1>},
{sparse_caller<1, 1, 2>, sparse_caller<1, 2, 2>, sparse_caller<1, 3, 2>, sparse_caller<1, 4, 2>, sparse_caller<1, 5, 2>},
{sparse_caller<1, 1, 3>, sparse_caller<1, 2, 3>, sparse_caller<1, 3, 3>, sparse_caller<1, 4, 3>, sparse_caller<1, 5, 3>},
{sparse_caller<1, 1, 4>, sparse_caller<1, 2, 4>, sparse_caller<1, 3, 4>, sparse_caller<1, 4, 4>, sparse_caller<1, 5, 4>},
{sparse_caller<1, 1, 5>, sparse_caller<1, 2, 5>, sparse_caller<1, 3, 5>, sparse_caller<1, 4, 5>, sparse_caller<1, 5, 5>}
};
bindTexture(&tex_If, I);
@ -522,11 +519,11 @@ namespace pyrlk
static const func_t funcs[5][5] =
{
{::sparse_caller<4, 1, 1>, ::sparse_caller<4, 2, 1>, ::sparse_caller<4, 3, 1>, ::sparse_caller<4, 4, 1>, ::sparse_caller<4, 5, 1>},
{::sparse_caller<4, 1, 2>, ::sparse_caller<4, 2, 2>, ::sparse_caller<4, 3, 2>, ::sparse_caller<4, 4, 2>, ::sparse_caller<4, 5, 2>},
{::sparse_caller<4, 1, 3>, ::sparse_caller<4, 2, 3>, ::sparse_caller<4, 3, 3>, ::sparse_caller<4, 4, 3>, ::sparse_caller<4, 5, 3>},
{::sparse_caller<4, 1, 4>, ::sparse_caller<4, 2, 4>, ::sparse_caller<4, 3, 4>, ::sparse_caller<4, 4, 4>, ::sparse_caller<4, 5, 4>},
{::sparse_caller<4, 1, 5>, ::sparse_caller<4, 2, 5>, ::sparse_caller<4, 3, 5>, ::sparse_caller<4, 4, 5>, ::sparse_caller<4, 5, 5>}
{sparse_caller<4, 1, 1>, sparse_caller<4, 2, 1>, sparse_caller<4, 3, 1>, sparse_caller<4, 4, 1>, sparse_caller<4, 5, 1>},
{sparse_caller<4, 1, 2>, sparse_caller<4, 2, 2>, sparse_caller<4, 3, 2>, sparse_caller<4, 4, 2>, sparse_caller<4, 5, 2>},
{sparse_caller<4, 1, 3>, sparse_caller<4, 2, 3>, sparse_caller<4, 3, 3>, sparse_caller<4, 4, 3>, sparse_caller<4, 5, 3>},
{sparse_caller<4, 1, 4>, sparse_caller<4, 2, 4>, sparse_caller<4, 3, 4>, sparse_caller<4, 4, 4>, sparse_caller<4, 5, 4>},
{sparse_caller<4, 1, 5>, sparse_caller<4, 2, 5>, sparse_caller<4, 3, 5>, sparse_caller<4, 4, 5>, sparse_caller<4, 5, 5>}
};
bindTexture(&tex_If4, I);
@ -551,12 +548,12 @@ namespace pyrlk
if (err.data)
{
::dense<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
denseKernel<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}
else
{
::dense<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
denseKernel<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
cudaSafeCall( cudaGetLastError() );
}

@ -49,20 +49,12 @@
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace
namespace row_filter
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float* kernel, int ksize, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
@ -195,182 +187,185 @@ namespace filter
{
{
0,
::caller< 1, T, D, BrdRowReflect101>,
::caller< 2, T, D, BrdRowReflect101>,
::caller< 3, T, D, BrdRowReflect101>,
::caller< 4, T, D, BrdRowReflect101>,
::caller< 5, T, D, BrdRowReflect101>,
::caller< 6, T, D, BrdRowReflect101>,
::caller< 7, T, D, BrdRowReflect101>,
::caller< 8, T, D, BrdRowReflect101>,
::caller< 9, T, D, BrdRowReflect101>,
::caller<10, T, D, BrdRowReflect101>,
::caller<11, T, D, BrdRowReflect101>,
::caller<12, T, D, BrdRowReflect101>,
::caller<13, T, D, BrdRowReflect101>,
::caller<14, T, D, BrdRowReflect101>,
::caller<15, T, D, BrdRowReflect101>,
::caller<16, T, D, BrdRowReflect101>,
::caller<17, T, D, BrdRowReflect101>,
::caller<18, T, D, BrdRowReflect101>,
::caller<19, T, D, BrdRowReflect101>,
::caller<20, T, D, BrdRowReflect101>,
::caller<21, T, D, BrdRowReflect101>,
::caller<22, T, D, BrdRowReflect101>,
::caller<23, T, D, BrdRowReflect101>,
::caller<24, T, D, BrdRowReflect101>,
::caller<25, T, D, BrdRowReflect101>,
::caller<26, T, D, BrdRowReflect101>,
::caller<27, T, D, BrdRowReflect101>,
::caller<28, T, D, BrdRowReflect101>,
::caller<29, T, D, BrdRowReflect101>,
::caller<30, T, D, BrdRowReflect101>,
::caller<31, T, D, BrdRowReflect101>,
::caller<32, T, D, BrdRowReflect101>
row_filter::caller< 1, T, D, BrdRowReflect101>,
row_filter::caller< 2, T, D, BrdRowReflect101>,
row_filter::caller< 3, T, D, BrdRowReflect101>,
row_filter::caller< 4, T, D, BrdRowReflect101>,
row_filter::caller< 5, T, D, BrdRowReflect101>,
row_filter::caller< 6, T, D, BrdRowReflect101>,
row_filter::caller< 7, T, D, BrdRowReflect101>,
row_filter::caller< 8, T, D, BrdRowReflect101>,
row_filter::caller< 9, T, D, BrdRowReflect101>,
row_filter::caller<10, T, D, BrdRowReflect101>,
row_filter::caller<11, T, D, BrdRowReflect101>,
row_filter::caller<12, T, D, BrdRowReflect101>,
row_filter::caller<13, T, D, BrdRowReflect101>,
row_filter::caller<14, T, D, BrdRowReflect101>,
row_filter::caller<15, T, D, BrdRowReflect101>,
row_filter::caller<16, T, D, BrdRowReflect101>,
row_filter::caller<17, T, D, BrdRowReflect101>,
row_filter::caller<18, T, D, BrdRowReflect101>,
row_filter::caller<19, T, D, BrdRowReflect101>,
row_filter::caller<20, T, D, BrdRowReflect101>,
row_filter::caller<21, T, D, BrdRowReflect101>,
row_filter::caller<22, T, D, BrdRowReflect101>,
row_filter::caller<23, T, D, BrdRowReflect101>,
row_filter::caller<24, T, D, BrdRowReflect101>,
row_filter::caller<25, T, D, BrdRowReflect101>,
row_filter::caller<26, T, D, BrdRowReflect101>,
row_filter::caller<27, T, D, BrdRowReflect101>,
row_filter::caller<28, T, D, BrdRowReflect101>,
row_filter::caller<29, T, D, BrdRowReflect101>,
row_filter::caller<30, T, D, BrdRowReflect101>,
row_filter::caller<31, T, D, BrdRowReflect101>,
row_filter::caller<32, T, D, BrdRowReflect101>
},
{
0,
::caller< 1, T, D, BrdRowReplicate>,
::caller< 2, T, D, BrdRowReplicate>,
::caller< 3, T, D, BrdRowReplicate>,
::caller< 4, T, D, BrdRowReplicate>,
::caller< 5, T, D, BrdRowReplicate>,
::caller< 6, T, D, BrdRowReplicate>,
::caller< 7, T, D, BrdRowReplicate>,
::caller< 8, T, D, BrdRowReplicate>,
::caller< 9, T, D, BrdRowReplicate>,
::caller<10, T, D, BrdRowReplicate>,
::caller<11, T, D, BrdRowReplicate>,
::caller<12, T, D, BrdRowReplicate>,
::caller<13, T, D, BrdRowReplicate>,
::caller<14, T, D, BrdRowReplicate>,
::caller<15, T, D, BrdRowReplicate>,
::caller<16, T, D, BrdRowReplicate>,
::caller<17, T, D, BrdRowReplicate>,
::caller<18, T, D, BrdRowReplicate>,
::caller<19, T, D, BrdRowReplicate>,
::caller<20, T, D, BrdRowReplicate>,
::caller<21, T, D, BrdRowReplicate>,
::caller<22, T, D, BrdRowReplicate>,
::caller<23, T, D, BrdRowReplicate>,
::caller<24, T, D, BrdRowReplicate>,
::caller<25, T, D, BrdRowReplicate>,
::caller<26, T, D, BrdRowReplicate>,
::caller<27, T, D, BrdRowReplicate>,
::caller<28, T, D, BrdRowReplicate>,
::caller<29, T, D, BrdRowReplicate>,
::caller<30, T, D, BrdRowReplicate>,
::caller<31, T, D, BrdRowReplicate>,
::caller<32, T, D, BrdRowReplicate>
row_filter::caller< 1, T, D, BrdRowReplicate>,
row_filter::caller< 2, T, D, BrdRowReplicate>,
row_filter::caller< 3, T, D, BrdRowReplicate>,
row_filter::caller< 4, T, D, BrdRowReplicate>,
row_filter::caller< 5, T, D, BrdRowReplicate>,
row_filter::caller< 6, T, D, BrdRowReplicate>,
row_filter::caller< 7, T, D, BrdRowReplicate>,
row_filter::caller< 8, T, D, BrdRowReplicate>,
row_filter::caller< 9, T, D, BrdRowReplicate>,
row_filter::caller<10, T, D, BrdRowReplicate>,
row_filter::caller<11, T, D, BrdRowReplicate>,
row_filter::caller<12, T, D, BrdRowReplicate>,
row_filter::caller<13, T, D, BrdRowReplicate>,
row_filter::caller<14, T, D, BrdRowReplicate>,
row_filter::caller<15, T, D, BrdRowReplicate>,
row_filter::caller<16, T, D, BrdRowReplicate>,
row_filter::caller<17, T, D, BrdRowReplicate>,
row_filter::caller<18, T, D, BrdRowReplicate>,
row_filter::caller<19, T, D, BrdRowReplicate>,
row_filter::caller<20, T, D, BrdRowReplicate>,
row_filter::caller<21, T, D, BrdRowReplicate>,
row_filter::caller<22, T, D, BrdRowReplicate>,
row_filter::caller<23, T, D, BrdRowReplicate>,
row_filter::caller<24, T, D, BrdRowReplicate>,
row_filter::caller<25, T, D, BrdRowReplicate>,
row_filter::caller<26, T, D, BrdRowReplicate>,
row_filter::caller<27, T, D, BrdRowReplicate>,
row_filter::caller<28, T, D, BrdRowReplicate>,
row_filter::caller<29, T, D, BrdRowReplicate>,
row_filter::caller<30, T, D, BrdRowReplicate>,
row_filter::caller<31, T, D, BrdRowReplicate>,
row_filter::caller<32, T, D, BrdRowReplicate>
},
{
0,
::caller< 1, T, D, BrdRowConstant>,
::caller< 2, T, D, BrdRowConstant>,
::caller< 3, T, D, BrdRowConstant>,
::caller< 4, T, D, BrdRowConstant>,
::caller< 5, T, D, BrdRowConstant>,
::caller< 6, T, D, BrdRowConstant>,
::caller< 7, T, D, BrdRowConstant>,
::caller< 8, T, D, BrdRowConstant>,
::caller< 9, T, D, BrdRowConstant>,
::caller<10, T, D, BrdRowConstant>,
::caller<11, T, D, BrdRowConstant>,
::caller<12, T, D, BrdRowConstant>,
::caller<13, T, D, BrdRowConstant>,
::caller<14, T, D, BrdRowConstant>,
::caller<15, T, D, BrdRowConstant>,
::caller<16, T, D, BrdRowConstant>,
::caller<17, T, D, BrdRowConstant>,
::caller<18, T, D, BrdRowConstant>,
::caller<19, T, D, BrdRowConstant>,
::caller<20, T, D, BrdRowConstant>,
::caller<21, T, D, BrdRowConstant>,
::caller<22, T, D, BrdRowConstant>,
::caller<23, T, D, BrdRowConstant>,
::caller<24, T, D, BrdRowConstant>,
::caller<25, T, D, BrdRowConstant>,
::caller<26, T, D, BrdRowConstant>,
::caller<27, T, D, BrdRowConstant>,
::caller<28, T, D, BrdRowConstant>,
::caller<29, T, D, BrdRowConstant>,
::caller<30, T, D, BrdRowConstant>,
::caller<31, T, D, BrdRowConstant>,
::caller<32, T, D, BrdRowConstant>
row_filter::caller< 1, T, D, BrdRowConstant>,
row_filter::caller< 2, T, D, BrdRowConstant>,
row_filter::caller< 3, T, D, BrdRowConstant>,
row_filter::caller< 4, T, D, BrdRowConstant>,
row_filter::caller< 5, T, D, BrdRowConstant>,
row_filter::caller< 6, T, D, BrdRowConstant>,
row_filter::caller< 7, T, D, BrdRowConstant>,
row_filter::caller< 8, T, D, BrdRowConstant>,
row_filter::caller< 9, T, D, BrdRowConstant>,
row_filter::caller<10, T, D, BrdRowConstant>,
row_filter::caller<11, T, D, BrdRowConstant>,
row_filter::caller<12, T, D, BrdRowConstant>,
row_filter::caller<13, T, D, BrdRowConstant>,
row_filter::caller<14, T, D, BrdRowConstant>,
row_filter::caller<15, T, D, BrdRowConstant>,
row_filter::caller<16, T, D, BrdRowConstant>,
row_filter::caller<17, T, D, BrdRowConstant>,
row_filter::caller<18, T, D, BrdRowConstant>,
row_filter::caller<19, T, D, BrdRowConstant>,
row_filter::caller<20, T, D, BrdRowConstant>,
row_filter::caller<21, T, D, BrdRowConstant>,
row_filter::caller<22, T, D, BrdRowConstant>,
row_filter::caller<23, T, D, BrdRowConstant>,
row_filter::caller<24, T, D, BrdRowConstant>,
row_filter::caller<25, T, D, BrdRowConstant>,
row_filter::caller<26, T, D, BrdRowConstant>,
row_filter::caller<27, T, D, BrdRowConstant>,
row_filter::caller<28, T, D, BrdRowConstant>,
row_filter::caller<29, T, D, BrdRowConstant>,
row_filter::caller<30, T, D, BrdRowConstant>,
row_filter::caller<31, T, D, BrdRowConstant>,
row_filter::caller<32, T, D, BrdRowConstant>
},
{
0,
::caller< 1, T, D, BrdRowReflect>,
::caller< 2, T, D, BrdRowReflect>,
::caller< 3, T, D, BrdRowReflect>,
::caller< 4, T, D, BrdRowReflect>,
::caller< 5, T, D, BrdRowReflect>,
::caller< 6, T, D, BrdRowReflect>,
::caller< 7, T, D, BrdRowReflect>,
::caller< 8, T, D, BrdRowReflect>,
::caller< 9, T, D, BrdRowReflect>,
::caller<10, T, D, BrdRowReflect>,
::caller<11, T, D, BrdRowReflect>,
::caller<12, T, D, BrdRowReflect>,
::caller<13, T, D, BrdRowReflect>,
::caller<14, T, D, BrdRowReflect>,
::caller<15, T, D, BrdRowReflect>,
::caller<16, T, D, BrdRowReflect>,
::caller<17, T, D, BrdRowReflect>,
::caller<18, T, D, BrdRowReflect>,
::caller<19, T, D, BrdRowReflect>,
::caller<20, T, D, BrdRowReflect>,
::caller<21, T, D, BrdRowReflect>,
::caller<22, T, D, BrdRowReflect>,
::caller<23, T, D, BrdRowReflect>,
::caller<24, T, D, BrdRowReflect>,
::caller<25, T, D, BrdRowReflect>,
::caller<26, T, D, BrdRowReflect>,
::caller<27, T, D, BrdRowReflect>,
::caller<28, T, D, BrdRowReflect>,
::caller<29, T, D, BrdRowReflect>,
::caller<30, T, D, BrdRowReflect>,
::caller<31, T, D, BrdRowReflect>,
::caller<32, T, D, BrdRowReflect>
row_filter::caller< 1, T, D, BrdRowReflect>,
row_filter::caller< 2, T, D, BrdRowReflect>,
row_filter::caller< 3, T, D, BrdRowReflect>,
row_filter::caller< 4, T, D, BrdRowReflect>,
row_filter::caller< 5, T, D, BrdRowReflect>,
row_filter::caller< 6, T, D, BrdRowReflect>,
row_filter::caller< 7, T, D, BrdRowReflect>,
row_filter::caller< 8, T, D, BrdRowReflect>,
row_filter::caller< 9, T, D, BrdRowReflect>,
row_filter::caller<10, T, D, BrdRowReflect>,
row_filter::caller<11, T, D, BrdRowReflect>,
row_filter::caller<12, T, D, BrdRowReflect>,
row_filter::caller<13, T, D, BrdRowReflect>,
row_filter::caller<14, T, D, BrdRowReflect>,
row_filter::caller<15, T, D, BrdRowReflect>,
row_filter::caller<16, T, D, BrdRowReflect>,
row_filter::caller<17, T, D, BrdRowReflect>,
row_filter::caller<18, T, D, BrdRowReflect>,
row_filter::caller<19, T, D, BrdRowReflect>,
row_filter::caller<20, T, D, BrdRowReflect>,
row_filter::caller<21, T, D, BrdRowReflect>,
row_filter::caller<22, T, D, BrdRowReflect>,
row_filter::caller<23, T, D, BrdRowReflect>,
row_filter::caller<24, T, D, BrdRowReflect>,
row_filter::caller<25, T, D, BrdRowReflect>,
row_filter::caller<26, T, D, BrdRowReflect>,
row_filter::caller<27, T, D, BrdRowReflect>,
row_filter::caller<28, T, D, BrdRowReflect>,
row_filter::caller<29, T, D, BrdRowReflect>,
row_filter::caller<30, T, D, BrdRowReflect>,
row_filter::caller<31, T, D, BrdRowReflect>,
row_filter::caller<32, T, D, BrdRowReflect>
},
{
0,
::caller< 1, T, D, BrdRowWrap>,
::caller< 2, T, D, BrdRowWrap>,
::caller< 3, T, D, BrdRowWrap>,
::caller< 4, T, D, BrdRowWrap>,
::caller< 5, T, D, BrdRowWrap>,
::caller< 6, T, D, BrdRowWrap>,
::caller< 7, T, D, BrdRowWrap>,
::caller< 8, T, D, BrdRowWrap>,
::caller< 9, T, D, BrdRowWrap>,
::caller<10, T, D, BrdRowWrap>,
::caller<11, T, D, BrdRowWrap>,
::caller<12, T, D, BrdRowWrap>,
::caller<13, T, D, BrdRowWrap>,
::caller<14, T, D, BrdRowWrap>,
::caller<15, T, D, BrdRowWrap>,
::caller<16, T, D, BrdRowWrap>,
::caller<17, T, D, BrdRowWrap>,
::caller<18, T, D, BrdRowWrap>,
::caller<19, T, D, BrdRowWrap>,
::caller<20, T, D, BrdRowWrap>,
::caller<21, T, D, BrdRowWrap>,
::caller<22, T, D, BrdRowWrap>,
::caller<23, T, D, BrdRowWrap>,
::caller<24, T, D, BrdRowWrap>,
::caller<25, T, D, BrdRowWrap>,
::caller<26, T, D, BrdRowWrap>,
::caller<27, T, D, BrdRowWrap>,
::caller<28, T, D, BrdRowWrap>,
::caller<29, T, D, BrdRowWrap>,
::caller<30, T, D, BrdRowWrap>,
::caller<31, T, D, BrdRowWrap>,
::caller<32, T, D, BrdRowWrap>
row_filter::caller< 1, T, D, BrdRowWrap>,
row_filter::caller< 2, T, D, BrdRowWrap>,
row_filter::caller< 3, T, D, BrdRowWrap>,
row_filter::caller< 4, T, D, BrdRowWrap>,
row_filter::caller< 5, T, D, BrdRowWrap>,
row_filter::caller< 6, T, D, BrdRowWrap>,
row_filter::caller< 7, T, D, BrdRowWrap>,
row_filter::caller< 8, T, D, BrdRowWrap>,
row_filter::caller< 9, T, D, BrdRowWrap>,
row_filter::caller<10, T, D, BrdRowWrap>,
row_filter::caller<11, T, D, BrdRowWrap>,
row_filter::caller<12, T, D, BrdRowWrap>,
row_filter::caller<13, T, D, BrdRowWrap>,
row_filter::caller<14, T, D, BrdRowWrap>,
row_filter::caller<15, T, D, BrdRowWrap>,
row_filter::caller<16, T, D, BrdRowWrap>,
row_filter::caller<17, T, D, BrdRowWrap>,
row_filter::caller<18, T, D, BrdRowWrap>,
row_filter::caller<19, T, D, BrdRowWrap>,
row_filter::caller<20, T, D, BrdRowWrap>,
row_filter::caller<21, T, D, BrdRowWrap>,
row_filter::caller<22, T, D, BrdRowWrap>,
row_filter::caller<23, T, D, BrdRowWrap>,
row_filter::caller<24, T, D, BrdRowWrap>,
row_filter::caller<25, T, D, BrdRowWrap>,
row_filter::caller<26, T, D, BrdRowWrap>,
row_filter::caller<27, T, D, BrdRowWrap>,
row_filter::caller<28, T, D, BrdRowWrap>,
row_filter::caller<29, T, D, BrdRowWrap>,
row_filter::caller<30, T, D, BrdRowWrap>,
row_filter::caller<31, T, D, BrdRowWrap>,
row_filter::caller<32, T, D, BrdRowWrap>
}
};
loadKernel(kernel, ksize, stream);
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}

@ -52,9 +52,9 @@ using namespace cv::gpu::device;
////////////////////////////////////////////////////////////
// centeredGradient
namespace
namespace tvl1flow
{
__global__ void centeredGradient(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
__global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -65,16 +65,13 @@ namespace
dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
}
}
namespace tvl1flow
{
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
::centeredGradient<<<grid, block>>>(src, dx, dy);
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
@ -84,7 +81,7 @@ namespace tvl1flow
////////////////////////////////////////////////////////////
// warpBackward
namespace
namespace tvl1flow
{
static __device__ __forceinline__ float bicubicCoeff(float x_)
{
@ -107,7 +104,7 @@ namespace
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void warpBackward(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
__global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -166,10 +163,7 @@ namespace
const float I0Val = I0(y, x);
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
}
namespace tvl1flow
{
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
{
const dim3 block(32, 8);
@ -179,7 +173,7 @@ namespace tvl1flow
bindTexture(&tex_I1x, I1x);
bindTexture(&tex_I1y, I1y);
::warpBackward<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
@ -189,7 +183,7 @@ namespace tvl1flow
////////////////////////////////////////////////////////////
// estimateU
namespace
namespace tvl1flow
{
__device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
{
@ -213,7 +207,7 @@ namespace
}
}
__global__ void estimateU(const PtrStepSzf I1wx, const PtrStepf I1wy,
__global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
const PtrStepf grad, const PtrStepf rho_c,
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
PtrStepf u1, PtrStepf u2, PtrStepf error,
@ -275,10 +269,7 @@ namespace
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
}
}
namespace tvl1flow
{
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
@ -288,7 +279,7 @@ namespace tvl1flow
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
::estimateU<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
@ -298,9 +289,9 @@ namespace tvl1flow
////////////////////////////////////////////////////////////
// estimateDualVariables
namespace
namespace tvl1flow
{
__global__ void estimateDualVariables(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -325,16 +316,13 @@ namespace
p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
}
}
namespace tvl1flow
{
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut)
{
const dim3 block(32, 8);
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
::estimateDualVariables<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );

@ -551,7 +551,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
src.locateROI(whole, offset);
if (info.supports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (src.step - offset.x))
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
{
ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);

@ -210,6 +210,18 @@ bool TestHaarCascadeApplication::process()
#if defined(__GNUC__)
//http://www.christian-seiler.de/projekte/fpmath/
#ifndef _FPU_EXTENDED
#define _FPU_EXTENDED 0
#endif
#ifndef _FPU_DOUBLE
#define _FPU_DOUBLE 0
#endif
#ifndef _FPU_SINGLE
#define _FPU_SINGLE 0
#endif
fpu_control_t fpu_oldcw, fpu_cw;
_FPU_GETCW(fpu_oldcw); // store old cw
fpu_cw = (fpu_oldcw & ~_FPU_EXTENDED & ~_FPU_DOUBLE & ~_FPU_SINGLE) | _FPU_SINGLE;

@ -54,14 +54,8 @@ inline void safeCall_(int code, const char* expr, const char* file, int line)
// Each GPU is associated with its own context
CUcontext contexts[2];
int main(int argc, char **argv)
int main()
{
if (argc > 1)
{
cout << "CUDA driver API sample\n";
return -1;
}
int num_devices = getCudaEnabledDeviceCount();
if (num_devices < 2)
{

@ -76,7 +76,7 @@ GpuMat d_result[2];
// CPU result
Mat result;
void printHelp()
static void printHelp()
{
std::cout << "Usage: driver_api_stereo_multi_gpu --left <left_image> --right <right_image>\n";
}

@ -76,8 +76,7 @@ int main(int argc, char** argv)
cv::gpu::GpuMat dframe(frame), roi(frame.rows, frame.cols, CV_8UC1), trois;
roi.setTo(cv::Scalar::all(1));
cascade.genRoi(roi, trois);
cascade.detect(dframe, trois, objects);
cascade.detect(dframe, roi, objects);
cv::Mat dt(objects);
typedef cv::gpu::SCascade::Detection Detection;

Loading…
Cancel
Save