|
|
|
@ -1250,4 +1250,49 @@ namespace cv { namespace gpu { namespace mathfunc |
|
|
|
|
|
|
|
|
|
} // namespace countnonzero |
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////////// |
|
|
|
|
// transpose |
|
|
|
|
|
|
|
|
|
template <typename T> |
|
|
|
|
__global__ void transpose(const DevMem2D_<T> src, PtrStep_<T> dst) |
|
|
|
|
{ |
|
|
|
|
__shared__ T s_mem[16 * 17]; |
|
|
|
|
|
|
|
|
|
int x = blockIdx.x * blockDim.x + threadIdx.x; |
|
|
|
|
int y = blockIdx.y * blockDim.y + threadIdx.y; |
|
|
|
|
int smem_idx = threadIdx.y * blockDim.x + threadIdx.x + threadIdx.y; |
|
|
|
|
|
|
|
|
|
if (y < src.rows && x < src.cols) |
|
|
|
|
{ |
|
|
|
|
s_mem[smem_idx] = src.ptr(y)[x]; |
|
|
|
|
} |
|
|
|
|
__syncthreads(); |
|
|
|
|
|
|
|
|
|
smem_idx = threadIdx.x * blockDim.x + threadIdx.y + threadIdx.x; |
|
|
|
|
|
|
|
|
|
x = blockIdx.y * blockDim.x + threadIdx.x; |
|
|
|
|
y = blockIdx.x * blockDim.y + threadIdx.y; |
|
|
|
|
|
|
|
|
|
if (y < src.cols && x < src.rows) |
|
|
|
|
{ |
|
|
|
|
dst.ptr(y)[x] = s_mem[smem_idx]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
template <typename T> |
|
|
|
|
void transpose_gpu(const DevMem2D& src, const DevMem2D& dst) |
|
|
|
|
{ |
|
|
|
|
dim3 threads(16, 16, 1); |
|
|
|
|
dim3 grid(divUp(src.cols, 16), divUp(src.rows, 16), 1); |
|
|
|
|
|
|
|
|
|
transpose<T><<<grid, threads>>>((DevMem2D_<T>)src, (DevMem2D_<T>)dst); |
|
|
|
|
cudaSafeCall( cudaThreadSynchronize() ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
template void transpose_gpu<uchar4 >(const DevMem2D& src, const DevMem2D& dst); |
|
|
|
|
template void transpose_gpu<char4 >(const DevMem2D& src, const DevMem2D& dst); |
|
|
|
|
template void transpose_gpu<ushort2>(const DevMem2D& src, const DevMem2D& dst); |
|
|
|
|
template void transpose_gpu<short2 >(const DevMem2D& src, const DevMem2D& dst); |
|
|
|
|
template void transpose_gpu<int >(const DevMem2D& src, const DevMem2D& dst); |
|
|
|
|
template void transpose_gpu<float >(const DevMem2D& src, const DevMem2D& dst); |
|
|
|
|
}}} |
|
|
|
|