@ -46,17 +46,14 @@
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/static_check.hpp"
namespace cv { namespace gpu { namespace device
namespace cv { namespace gpu { namespace device
{
{
#define MAX_KERNEL_SIZE 16
#define BLOCK_DIM_X 16
#define BLOCK_DIM_Y 4
#define RESULT_STEPS 8
#define HALO_STEPS 1
namespace row_filter
namespace row_filter
{
{
#define MAX_KERNEL_SIZE 32
__constant__ float c_kernel[MAX_KERNEL_SIZE];
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float kernel[], int ksize)
void loadKernel(const float kernel[], int ksize)
@ -64,87 +61,74 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
}
}
namespace detail
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int PATCH_PER_BLOCK, int HALO_SIZE, int KSIZE, typename T, typename D, typename B>
{
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
template <typename T, size_t size> struct SmemType
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
};
template <typename T> struct SmemType<T, 4>
{
typedef T smem_t;
};
}
template <typename T> struct SmemType
{
{
typedef typename detail::SmemType<T, sizeof(T)>::smem_t smem_t;
Static<KSIZE <= MAX_KERNEL_SIZE>::check();
};
Static<HALO_SIZE * BLOCK_DIM_X >= KSIZE>::check();
Static<VecTraits<T>::cn == VecTraits<D>::cn>::check();
template <int KERNEL_SIZE, typename T, typename D, typename B>
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
{
typedef typename SmemType<T>::smem_t smem_t;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ smem_t smem[BLOCK_DIM_Y][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_X];
__shared__ typename sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
//Offset to the left halo edge
const int x = (blockIdx.x * RESULT_STEPS - HALO_STEPS) * BLOCK_DIM_X + threadIdx.x;
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;
if (y < src.rows)
if (y >= src.rows)
{
return;
const T* src_row = src.ptr(y);
//Load main data
const T* src_row = src.ptr(y);
#pragma unroll
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_high(i * BLOCK_DIM_X + x, src_row);
//Load left halo
const int xStart = blockIdx.x * (PATCH_PER_BLOCK * BLOCK_DIM_X) + threadIdx.x;
#pragma unroll
for(int i = 0; i < HALO_STEPS; ++i)
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_low(i * BLOCK_DIM_X + x, src_row);
//Load righ t halo
//Load left halo
#pragma unroll
#pragma unroll
for(int i = HALO_STEPS + RESULT_STEPS; i < HALO_STEPS + RESULT_STEPS + HALO_STEPS; ++i )
for (int j = 0; j < HALO_SIZE; ++j )
smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X] = b.at_high(i * BLOCK_DIM_X + x, src_row );
smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row) );
__syncthreads();
//Load main data
#pragma unroll
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
D* dst_row = dst.ptr(y);
//Load right halo
#pragma unroll
for (int j = 0; j < HALO_SIZE; ++j)
smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
#pragma unroll
__syncthreads();
for(int i = HALO_STEPS; i < HALO_STEPS + RESULT_STEPS; ++i)
{
#pragma unroll
sum_t sum = VecTraits<sum_t>::all(0);
for (int j = 0; j < PATCH_PER_BLOCK; ++j)
{
const int x = xStart + j * BLOCK_DIM_X;
#pragma unroll
if (x >= src.cols)
for (int j = 0; j < KERNEL_SIZE; ++j)
return;
sum = sum + smem[threadIdx.y][threadIdx.x + i * BLOCK_DIM_X + j - anchor] * c_kernel[j];
int dstX = x + i * BLOCK_DIM_X ;
sum_t sum = VecTraits<sum_t>::all(0) ;
if (dstX < src.cols)
#pragma unroll
dst_row[dstX] = saturate_cast<D>(sum);
for (int k = 0; k < KSIZE; ++k)
}
sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
dst(y, x) = saturate_cast<D>(sum);
}
}
}
}
template <int ksize , typename T, typename D, template<typename> class B>
template <int KSIZE , typename T, typename D, template<typename> class B>
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
void linearRowFilter_caller(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, cudaStream_t stream)
{
{
typedef typename SmemType<T>::smem_t smem_t;
const int BLOCK_DIM_X = 32;
const int BLOCK_DIM_Y = 8;
const int PATCH_PER_BLOCK = 4;
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, RESULT_STEPS * BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
const dim3 grid(divUp(src.cols, BLOCK_DIM_X * PATCH_PER_BLOCK ), divUp(src.rows, BLOCK_DIM_Y));
B<smem_t> b (src.cols);
B<T> brd (src.cols);
linearRowFilter<ksize , T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, PATCH_PER_BLOCK, 1, KSIZE , T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
if (stream == 0)
@ -152,106 +136,187 @@ namespace cv { namespace gpu { namespace device
}
}
template <typename T, typename D>
template <typename T, typename D>
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
void linearRowFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
{
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, cudaStream_t stream);
static const caller_t callers[5][17] =
static const caller_t callers[5][33] =
{
{
{
{
0,
0,
linearRowFilter_caller<1 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 1 , T, D, BrdRowReflect101>,
linearRowFilter_caller<2 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 2 , T, D, BrdRowReflect101>,
linearRowFilter_caller<3 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 3 , T, D, BrdRowReflect101>,
linearRowFilter_caller<4 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 4 , T, D, BrdRowReflect101>,
linearRowFilter_caller<5 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 5 , T, D, BrdRowReflect101>,
linearRowFilter_caller<6 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 6 , T, D, BrdRowReflect101>,
linearRowFilter_caller<7 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 7 , T, D, BrdRowReflect101>,
linearRowFilter_caller<8 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 8 , T, D, BrdRowReflect101>,
linearRowFilter_caller<9 , T, D, BrdRowReflect101>,
linearRowFilter_caller< 9 , T, D, BrdRowReflect101>,
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
linearRowFilter_caller<10, T, D, BrdRowReflect101>,
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
linearRowFilter_caller<11, T, D, BrdRowReflect101>,
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
linearRowFilter_caller<12, T, D, BrdRowReflect101>,
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
linearRowFilter_caller<13, T, D, BrdRowReflect101>,
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
linearRowFilter_caller<14, T, D, BrdRowReflect101>,
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
linearRowFilter_caller<15, T, D, BrdRowReflect101>,
linearRowFilter_caller<16, T, D, BrdRowReflect101>
linearRowFilter_caller<16, T, D, BrdRowReflect101>,
linearRowFilter_caller<17, T, D, BrdRowReflect101>,
linearRowFilter_caller<18, T, D, BrdRowReflect101>,
linearRowFilter_caller<19, T, D, BrdRowReflect101>,
linearRowFilter_caller<20, T, D, BrdRowReflect101>,
linearRowFilter_caller<21, T, D, BrdRowReflect101>,
linearRowFilter_caller<22, T, D, BrdRowReflect101>,
linearRowFilter_caller<23, T, D, BrdRowReflect101>,
linearRowFilter_caller<24, T, D, BrdRowReflect101>,
linearRowFilter_caller<25, T, D, BrdRowReflect101>,
linearRowFilter_caller<26, T, D, BrdRowReflect101>,
linearRowFilter_caller<27, T, D, BrdRowReflect101>,
linearRowFilter_caller<28, T, D, BrdRowReflect101>,
linearRowFilter_caller<29, T, D, BrdRowReflect101>,
linearRowFilter_caller<30, T, D, BrdRowReflect101>,
linearRowFilter_caller<31, T, D, BrdRowReflect101>,
linearRowFilter_caller<32, T, D, BrdRowReflect101>
},
},
{
{
0,
0,
linearRowFilter_caller<1 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 1 , T, D, BrdRowReplicate>,
linearRowFilter_caller<2 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 2 , T, D, BrdRowReplicate>,
linearRowFilter_caller<3 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 3 , T, D, BrdRowReplicate>,
linearRowFilter_caller<4 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 4 , T, D, BrdRowReplicate>,
linearRowFilter_caller<5 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 5 , T, D, BrdRowReplicate>,
linearRowFilter_caller<6 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 6 , T, D, BrdRowReplicate>,
linearRowFilter_caller<7 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 7 , T, D, BrdRowReplicate>,
linearRowFilter_caller<8 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 8 , T, D, BrdRowReplicate>,
linearRowFilter_caller<9 , T, D, BrdRowReplicate>,
linearRowFilter_caller< 9 , T, D, BrdRowReplicate>,
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
linearRowFilter_caller<10, T, D, BrdRowReplicate>,
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
linearRowFilter_caller<11, T, D, BrdRowReplicate>,
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
linearRowFilter_caller<12, T, D, BrdRowReplicate>,
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
linearRowFilter_caller<13, T, D, BrdRowReplicate>,
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
linearRowFilter_caller<14, T, D, BrdRowReplicate>,
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
linearRowFilter_caller<15, T, D, BrdRowReplicate>,
linearRowFilter_caller<16, T, D, BrdRowReplicate>
linearRowFilter_caller<16, T, D, BrdRowReplicate>,
linearRowFilter_caller<17, T, D, BrdRowReplicate>,
linearRowFilter_caller<18, T, D, BrdRowReplicate>,
linearRowFilter_caller<19, T, D, BrdRowReplicate>,
linearRowFilter_caller<20, T, D, BrdRowReplicate>,
linearRowFilter_caller<21, T, D, BrdRowReplicate>,
linearRowFilter_caller<22, T, D, BrdRowReplicate>,
linearRowFilter_caller<23, T, D, BrdRowReplicate>,
linearRowFilter_caller<24, T, D, BrdRowReplicate>,
linearRowFilter_caller<25, T, D, BrdRowReplicate>,
linearRowFilter_caller<26, T, D, BrdRowReplicate>,
linearRowFilter_caller<27, T, D, BrdRowReplicate>,
linearRowFilter_caller<28, T, D, BrdRowReplicate>,
linearRowFilter_caller<29, T, D, BrdRowReplicate>,
linearRowFilter_caller<30, T, D, BrdRowReplicate>,
linearRowFilter_caller<31, T, D, BrdRowReplicate>,
linearRowFilter_caller<32, T, D, BrdRowReplicate>
},
},
{
{
0,
0,
linearRowFilter_caller<1 , T, D, BrdRowConstant>,
linearRowFilter_caller< 1 , T, D, BrdRowConstant>,
linearRowFilter_caller<2 , T, D, BrdRowConstant>,
linearRowFilter_caller< 2 , T, D, BrdRowConstant>,
linearRowFilter_caller<3 , T, D, BrdRowConstant>,
linearRowFilter_caller< 3 , T, D, BrdRowConstant>,
linearRowFilter_caller<4 , T, D, BrdRowConstant>,
linearRowFilter_caller< 4 , T, D, BrdRowConstant>,
linearRowFilter_caller<5 , T, D, BrdRowConstant>,
linearRowFilter_caller< 5 , T, D, BrdRowConstant>,
linearRowFilter_caller<6 , T, D, BrdRowConstant>,
linearRowFilter_caller< 6 , T, D, BrdRowConstant>,
linearRowFilter_caller<7 , T, D, BrdRowConstant>,
linearRowFilter_caller< 7 , T, D, BrdRowConstant>,
linearRowFilter_caller<8 , T, D, BrdRowConstant>,
linearRowFilter_caller< 8 , T, D, BrdRowConstant>,
linearRowFilter_caller<9 , T, D, BrdRowConstant>,
linearRowFilter_caller< 9 , T, D, BrdRowConstant>,
linearRowFilter_caller<10, T, D, BrdRowConstant>,
linearRowFilter_caller<10, T, D, BrdRowConstant>,
linearRowFilter_caller<11, T, D, BrdRowConstant>,
linearRowFilter_caller<11, T, D, BrdRowConstant>,
linearRowFilter_caller<12, T, D, BrdRowConstant>,
linearRowFilter_caller<12, T, D, BrdRowConstant>,
linearRowFilter_caller<13, T, D, BrdRowConstant>,
linearRowFilter_caller<13, T, D, BrdRowConstant>,
linearRowFilter_caller<14, T, D, BrdRowConstant>,
linearRowFilter_caller<14, T, D, BrdRowConstant>,
linearRowFilter_caller<15, T, D, BrdRowConstant>,
linearRowFilter_caller<15, T, D, BrdRowConstant>,
linearRowFilter_caller<16, T, D, BrdRowConstant>
linearRowFilter_caller<16, T, D, BrdRowConstant>,
linearRowFilter_caller<17, T, D, BrdRowConstant>,
linearRowFilter_caller<18, T, D, BrdRowConstant>,
linearRowFilter_caller<19, T, D, BrdRowConstant>,
linearRowFilter_caller<20, T, D, BrdRowConstant>,
linearRowFilter_caller<21, T, D, BrdRowConstant>,
linearRowFilter_caller<22, T, D, BrdRowConstant>,
linearRowFilter_caller<23, T, D, BrdRowConstant>,
linearRowFilter_caller<24, T, D, BrdRowConstant>,
linearRowFilter_caller<25, T, D, BrdRowConstant>,
linearRowFilter_caller<26, T, D, BrdRowConstant>,
linearRowFilter_caller<27, T, D, BrdRowConstant>,
linearRowFilter_caller<28, T, D, BrdRowConstant>,
linearRowFilter_caller<29, T, D, BrdRowConstant>,
linearRowFilter_caller<30, T, D, BrdRowConstant>,
linearRowFilter_caller<31, T, D, BrdRowConstant>,
linearRowFilter_caller<32, T, D, BrdRowConstant>
},
},
{
{
0,
0,
linearRowFilter_caller<1 , T, D, BrdRowReflect>,
linearRowFilter_caller< 1 , T, D, BrdRowReflect>,
linearRowFilter_caller<2 , T, D, BrdRowReflect>,
linearRowFilter_caller< 2 , T, D, BrdRowReflect>,
linearRowFilter_caller<3 , T, D, BrdRowReflect>,
linearRowFilter_caller< 3 , T, D, BrdRowReflect>,
linearRowFilter_caller<4 , T, D, BrdRowReflect>,
linearRowFilter_caller< 4 , T, D, BrdRowReflect>,
linearRowFilter_caller<5 , T, D, BrdRowReflect>,
linearRowFilter_caller< 5 , T, D, BrdRowReflect>,
linearRowFilter_caller<6 , T, D, BrdRowReflect>,
linearRowFilter_caller< 6 , T, D, BrdRowReflect>,
linearRowFilter_caller<7 , T, D, BrdRowReflect>,
linearRowFilter_caller< 7 , T, D, BrdRowReflect>,
linearRowFilter_caller<8 , T, D, BrdRowReflect>,
linearRowFilter_caller< 8 , T, D, BrdRowReflect>,
linearRowFilter_caller<9 , T, D, BrdRowReflect>,
linearRowFilter_caller< 9 , T, D, BrdRowReflect>,
linearRowFilter_caller<10, T, D, BrdRowReflect>,
linearRowFilter_caller<10, T, D, BrdRowReflect>,
linearRowFilter_caller<11, T, D, BrdRowReflect>,
linearRowFilter_caller<11, T, D, BrdRowReflect>,
linearRowFilter_caller<12, T, D, BrdRowReflect>,
linearRowFilter_caller<12, T, D, BrdRowReflect>,
linearRowFilter_caller<13, T, D, BrdRowReflect>,
linearRowFilter_caller<13, T, D, BrdRowReflect>,
linearRowFilter_caller<14, T, D, BrdRowReflect>,
linearRowFilter_caller<14, T, D, BrdRowReflect>,
linearRowFilter_caller<15, T, D, BrdRowReflect>,
linearRowFilter_caller<15, T, D, BrdRowReflect>,
linearRowFilter_caller<16, T, D, BrdRowReflect>
linearRowFilter_caller<16, T, D, BrdRowReflect>,
linearRowFilter_caller<17, T, D, BrdRowReflect>,
linearRowFilter_caller<18, T, D, BrdRowReflect>,
linearRowFilter_caller<19, T, D, BrdRowReflect>,
linearRowFilter_caller<20, T, D, BrdRowReflect>,
linearRowFilter_caller<21, T, D, BrdRowReflect>,
linearRowFilter_caller<22, T, D, BrdRowReflect>,
linearRowFilter_caller<23, T, D, BrdRowReflect>,
linearRowFilter_caller<24, T, D, BrdRowReflect>,
linearRowFilter_caller<25, T, D, BrdRowReflect>,
linearRowFilter_caller<26, T, D, BrdRowReflect>,
linearRowFilter_caller<27, T, D, BrdRowReflect>,
linearRowFilter_caller<28, T, D, BrdRowReflect>,
linearRowFilter_caller<29, T, D, BrdRowReflect>,
linearRowFilter_caller<30, T, D, BrdRowReflect>,
linearRowFilter_caller<31, T, D, BrdRowReflect>,
linearRowFilter_caller<32, T, D, BrdRowReflect>
},
},
{
{
0,
0,
linearRowFilter_caller<1 , T, D, BrdRowWrap>,
linearRowFilter_caller< 1 , T, D, BrdRowWrap>,
linearRowFilter_caller<2 , T, D, BrdRowWrap>,
linearRowFilter_caller< 2 , T, D, BrdRowWrap>,
linearRowFilter_caller<3 , T, D, BrdRowWrap>,
linearRowFilter_caller< 3 , T, D, BrdRowWrap>,
linearRowFilter_caller<4 , T, D, BrdRowWrap>,
linearRowFilter_caller< 4 , T, D, BrdRowWrap>,
linearRowFilter_caller<5 , T, D, BrdRowWrap>,
linearRowFilter_caller< 5 , T, D, BrdRowWrap>,
linearRowFilter_caller<6 , T, D, BrdRowWrap>,
linearRowFilter_caller< 6 , T, D, BrdRowWrap>,
linearRowFilter_caller<7 , T, D, BrdRowWrap>,
linearRowFilter_caller< 7 , T, D, BrdRowWrap>,
linearRowFilter_caller<8 , T, D, BrdRowWrap>,
linearRowFilter_caller< 8 , T, D, BrdRowWrap>,
linearRowFilter_caller<9 , T, D, BrdRowWrap>,
linearRowFilter_caller< 9 , T, D, BrdRowWrap>,
linearRowFilter_caller<10, T, D, BrdRowWrap>,
linearRowFilter_caller<10, T, D, BrdRowWrap>,
linearRowFilter_caller<11, T, D, BrdRowWrap>,
linearRowFilter_caller<11, T, D, BrdRowWrap>,
linearRowFilter_caller<12, T, D, BrdRowWrap>,
linearRowFilter_caller<12, T, D, BrdRowWrap>,
linearRowFilter_caller<13, T, D, BrdRowWrap>,
linearRowFilter_caller<13, T, D, BrdRowWrap>,
linearRowFilter_caller<14, T, D, BrdRowWrap>,
linearRowFilter_caller<14, T, D, BrdRowWrap>,
linearRowFilter_caller<15, T, D, BrdRowWrap>,
linearRowFilter_caller<15, T, D, BrdRowWrap>,
linearRowFilter_caller<16, T, D, BrdRowWrap>
linearRowFilter_caller<16, T, D, BrdRowWrap>,
}
linearRowFilter_caller<17, T, D, BrdRowWrap>,
linearRowFilter_caller<18, T, D, BrdRowWrap>,
linearRowFilter_caller<19, T, D, BrdRowWrap>,
linearRowFilter_caller<20, T, D, BrdRowWrap>,
linearRowFilter_caller<21, T, D, BrdRowWrap>,
linearRowFilter_caller<22, T, D, BrdRowWrap>,
linearRowFilter_caller<23, T, D, BrdRowWrap>,
linearRowFilter_caller<24, T, D, BrdRowWrap>,
linearRowFilter_caller<25, T, D, BrdRowWrap>,
linearRowFilter_caller<26, T, D, BrdRowWrap>,
linearRowFilter_caller<27, T, D, BrdRowWrap>,
linearRowFilter_caller<28, T, D, BrdRowWrap>,
linearRowFilter_caller<29, T, D, BrdRowWrap>,
linearRowFilter_caller<30, T, D, BrdRowWrap>,
linearRowFilter_caller<31, T, D, BrdRowWrap>,
linearRowFilter_caller<32, T, D, BrdRowWrap>
}
};
};
loadKernel(kernel, ksize);
loadKernel(kernel, ksize);
@ -259,12 +324,10 @@ namespace cv { namespace gpu { namespace device
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
}
}
template void linearRowFilter_gpu<uchar , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<uchar , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<uchar4, float4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearRowFilter_gpu<short , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearRowFilter_gpu<short2, float2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<short3, float3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<int , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearRowFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
} // namespace row_filter
} // namespace row_filter
}}} // namespace cv { namespace gpu { namespace device
}}} // namespace cv { namespace gpu { namespace device